# Course: EDF 6436 - Theory of Measurement
# Assignment: EFA
# Author: Nguyet Hoang | hoangt@ufl.edu
# Set up R
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(conflicted)
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package.
library(psych)
library(scales)
library(GPArotation)
# Read data
data <- read.csv("ToMassignment1data.csv", sep = ";", header = TRUE)
view(data)
# Univariate descriptives
describe(data)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## V1 1 225 0.67 0.47 1 0.71 0 0 1 1 -0.72 -1.48 0.03
## V2 2 225 0.66 0.47 1 0.70 0 0 1 1 -0.68 -1.54 0.03
## V3 3 225 0.46 0.50 0 0.45 0 0 1 1 0.17 -1.98 0.03
## V4 4 225 0.39 0.49 0 0.36 0 0 1 1 0.46 -1.79 0.03
## V5 5 225 0.48 0.50 0 0.48 0 0 1 1 0.06 -2.01 0.03
## V6 6 225 0.54 0.50 1 0.55 0 0 1 1 -0.15 -1.99 0.03
## V7 7 225 0.74 0.44 1 0.80 0 0 1 1 -1.10 -0.79 0.03
## V8 8 225 0.78 0.42 1 0.85 0 0 1 1 -1.33 -0.24 0.03
## V9 9 225 0.36 0.48 0 0.32 0 0 1 1 0.60 -1.65 0.03
## V10 10 225 0.28 0.45 0 0.23 0 0 1 1 0.97 -1.06 0.03
## V11 11 225 0.70 0.46 1 0.75 0 0 1 1 -0.88 -1.23 0.03
## V12 12 225 0.77 0.42 1 0.83 0 0 1 1 -1.27 -0.40 0.03
## V13 13 225 0.71 0.45 1 0.76 0 0 1 1 -0.93 -1.15 0.03
## V14 14 225 0.48 0.50 0 0.48 0 0 1 1 0.06 -2.01 0.03
## V15 15 225 0.72 0.45 1 0.78 0 0 1 1 -1.00 -1.01 0.03
## V16 16 225 0.70 0.46 1 0.75 0 0 1 1 -0.88 -1.23 0.03
## V17 17 225 0.41 0.49 0 0.39 0 0 1 1 0.35 -1.89 0.03
## V18 18 225 0.37 0.48 0 0.34 0 0 1 1 0.52 -1.74 0.03
## V19 19 225 0.61 0.49 1 0.64 0 0 1 1 -0.44 -1.81 0.03
## V20 20 225 0.47 0.50 0 0.46 0 0 1 1 0.11 -2.00 0.03
# Step 1: Extract factors - report unrotated number of factors
# 1.1. Using principal components method for factor extraction
tet_matrix <- tetrachoric(data)$rho
options(mc.cores = 1)
pc_result <- fa.parallel(tet_matrix, fa = "pc")
## Warning in fa.parallel(tet_matrix, fa = "pc"): It seems as if you are using a
## correlation matrix, but have not specified the number of cases. The number of
## subjects is arbitrarily set to be 100

## Parallel analysis suggests that the number of factors = NA and the number of components = 2
pc_actual_eigenvalues <- pc_result$pc.values
pc_simulated_eigenvalues <- pc_result$pc.sim
pc_eigenvalues_table <- data.frame(
Component = 1:length(pc_actual_eigenvalues),
Actual_Eigenvalue = pc_actual_eigenvalues,
Simulated_Eigenvalue = pc_simulated_eigenvalues
)
pc_eigenvalues_table
## Component Actual_Eigenvalue Simulated_Eigenvalue
## 1 1 5.25410717 1.8476353
## 2 2 2.07757639 1.7028986
## 3 3 1.57314867 1.5856240
## 4 4 1.44709248 1.4767673
## 5 5 1.37308416 1.3784481
## 6 6 1.21261177 1.2747409
## 7 7 0.99725182 1.2195438
## 8 8 0.88970057 1.1332806
## 9 9 0.83930492 1.0514939
## 10 10 0.75976102 0.9682943
## 11 11 0.73935061 0.9051813
## 12 12 0.58867605 0.8351319
## 13 13 0.55321288 0.7913555
## 14 14 0.44494445 0.7184168
## 15 15 0.40192293 0.6647296
## 16 16 0.34363186 0.6085670
## 17 17 0.20053937 0.5512506
## 18 18 0.16968126 0.4975449
## 19 19 0.08229803 0.4267078
## 20 20 0.05210361 0.3623879
# With the principal components method, it suggests that the number of factors = 2
# 1.2. Using common factors model for factor extraction
cf_result <- fa.parallel(tet_matrix, fa = "fa")
## Warning in fa.parallel(tet_matrix, fa = "fa"): It seems as if you are using a
## correlation matrix, but have not specified the number of cases. The number of
## subjects is arbitrarily set to be 100

## Parallel analysis suggests that the number of factors = 6 and the number of components = NA
cf_actual_eigenvalues <- cf_result$fa.values
cf_simulated_eigenvalues <- cf_result$fa.sim
cf_eigenvalues_table <- data.frame(
Factor = 1:length(cf_actual_eigenvalues),
Actual_Eigenvalue = cf_actual_eigenvalues,
Simulated_Eigenvalue = cf_simulated_eigenvalues
)
cf_eigenvalues_table
## Factor Actual_Eigenvalue Simulated_Eigenvalue
## 1 1 4.53021531 1.05225411
## 2 2 1.27337346 0.76624560
## 3 3 0.75793914 0.61553041
## 4 4 0.69951360 0.51830026
## 5 5 0.62917335 0.42056453
## 6 6 0.39972223 0.32406979
## 7 7 0.25091458 0.25193111
## 8 8 0.10331685 0.17645156
## 9 9 0.07595653 0.09274877
## 10 10 -0.03877759 0.02055269
## 11 11 -0.12832065 -0.03974797
## 12 12 -0.18119492 -0.11310174
## 13 13 -0.22869788 -0.17095139
## 14 14 -0.32602132 -0.22225797
## 15 15 -0.36530473 -0.28626087
## 16 16 -0.45151054 -0.35074428
## 17 17 -0.53364304 -0.40411937
## 18 18 -0.60505757 -0.46819774
## 19 19 -0.62789201 -0.53278039
## 20 20 -0.70348954 -0.59823449
# With the common factors model, it suggests that the number of factors = 5
# Step 2: Rotating the factors
# 2.1. Orthogonal rotation (Varimax)
ortho_result <- fa(data, nfactors = 2, rotate = "varimax")
print(ortho_result, cut = 0.4, sort = TRUE)
## Factor Analysis using method = minres
## Call: fa(r = data, nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item MR1 MR2 h2 u2 com
## V13 13 0.67 0.479 0.52 1.1
## V7 7 0.42 0.264 0.74 1.8
## V8 8 0.42 0.190 0.81 1.2
## V17 17 0.41 0.192 0.81 1.2
## V14 14 0.41 0.208 0.79 1.4
## V20 20 0.41 0.268 0.73 1.9
## V12 12 0.41 0.179 0.82 1.2
## V19 19 0.140 0.86 1.6
## V2 2 0.105 0.90 1.1
## V3 3 0.141 0.86 1.8
## V1 1 0.160 0.84 2.0
## V10 10 0.024 0.98 1.8
## V11 11 0.59 0.348 0.65 1.0
## V9 9 0.52 0.273 0.73 1.0
## V6 6 0.214 0.79 1.8
## V5 5 0.146 0.85 1.1
## V16 16 0.144 0.86 1.3
## V15 15 0.121 0.88 1.5
## V18 18 0.110 0.89 2.0
## V4 4 0.026 0.97 1.0
##
## MR1 MR2
## SS loadings 2.05 1.68
## Proportion Var 0.10 0.08
## Cumulative Var 0.10 0.19
## Proportion Explained 0.55 0.45
## Cumulative Proportion 0.55 1.00
##
## Mean item complexity = 1.4
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 190 with the objective function = 2.89 with Chi Square = 626.1
## df of the model are 151 and the objective function was 1
##
## The root mean square of the residuals (RMSR) is 0.06
## The df corrected root mean square of the residuals is 0.07
##
## The harmonic n.obs is 225 with the empirical chi square 148.71 with prob < 0.54
## The total n.obs was 225 with Likelihood Chi Square = 215.93 with prob < 0.00042
##
## Tucker Lewis Index of factoring reliability = 0.811
## RMSEA index = 0.043 and the 90 % confidence intervals are 0.03 0.056
## BIC = -601.9
## Fit based upon off diagonal values = 0.85
## Measures of factor score adequacy
## MR1 MR2
## Correlation of (regression) scores with factors 0.84 0.81
## Multiple R square of scores with factors 0.71 0.66
## Minimum correlation of possible factor scores 0.43 0.32
# 2.2. Oblique rotation (Oblimin)
oblique_result <- fa(data, nfactors = 5, rotate = "oblimin")
print(oblique_result, cut = 0.4, sort = TRUE)
## Factor Analysis using method = minres
## Call: fa(r = data, nfactors = 5, rotate = "oblimin")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item MR3 MR1 MR4 MR2 MR5 h2 u2 com
## V7 7 1.00 0.996 0.0039 1.0
## V2 2 0.142 0.8579 2.7
## V3 3 0.139 0.8615 4.1
## V18 18 0.55 0.292 0.7078 1.1
## V20 20 0.55 0.414 0.5855 1.3
## V1 1 0.241 0.7593 2.7
## V6 6 0.220 0.7797 2.6
## V19 19 0.63 0.414 0.5862 1.1
## V8 8 0.45 0.287 0.7134 1.5
## V16 16 0.235 0.7655 2.3
## V13 13 0.61 0.483 0.5174 1.3
## V17 17 0.43 0.291 0.7090 1.8
## V14 14 0.270 0.7298 2.2
## V12 12 0.337 0.6628 2.9
## V5 5 0.45 0.236 0.7639 1.2
## V9 9 0.44 0.291 0.7088 1.7
## V11 11 0.355 0.6453 2.4
## V15 15 0.180 0.8205 2.3
## V4 4 0.071 0.9286 1.4
## V10 10 0.084 0.9163 2.1
##
## MR3 MR1 MR4 MR2 MR5
## SS loadings 1.41 1.21 1.20 1.09 1.07
## Proportion Var 0.07 0.06 0.06 0.05 0.05
## Cumulative Var 0.07 0.13 0.19 0.25 0.30
## Proportion Explained 0.24 0.20 0.20 0.18 0.18
## Cumulative Proportion 0.24 0.44 0.64 0.82 1.00
##
## With factor correlations of
## MR3 MR1 MR4 MR2 MR5
## MR3 1.00 0.29 0.21 0.20 0.16
## MR1 0.29 1.00 0.25 0.18 0.27
## MR4 0.21 0.25 1.00 0.26 0.22
## MR2 0.20 0.18 0.26 1.00 -0.01
## MR5 0.16 0.27 0.22 -0.01 1.00
##
## Mean item complexity = 2
## Test of the hypothesis that 5 factors are sufficient.
##
## df null model = 190 with the objective function = 2.89 with Chi Square = 626.1
## df of the model are 100 and the objective function was 0.46
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic n.obs is 225 with the empirical chi square 55.5 with prob < 1
## The total n.obs was 225 with Likelihood Chi Square = 97.39 with prob < 0.56
##
## Tucker Lewis Index of factoring reliability = 1.012
## RMSEA index = 0 and the 90 % confidence intervals are 0 0.033
## BIC = -444.22
## Fit based upon off diagonal values = 0.95
## Measures of factor score adequacy
## MR3 MR1 MR4 MR2 MR5
## Correlation of (regression) scores with factors 0.98 0.72 0.73 0.74 0.72
## Multiple R square of scores with factors 0.96 0.52 0.54 0.55 0.52
## Minimum correlation of possible factor scores 0.93 0.03 0.08 0.11 0.04