# Course: EDF 6436 - Theory of Measurement
# Assignment: EFA
# Author: Nguyet Hoang | hoangt@ufl.edu
# Set up R
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(conflicted)
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package.
library(psych)
library(scales)
library(GPArotation)
# Read data
data <- read.csv("ToMassignment1data.csv", sep = ";", header = TRUE)
view(data)
# Univariate descriptives
describe(data)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## V1 1 225 0.67 0.47 1 0.71 0 0 1 1 -0.72 -1.48 0.03
## V2 2 225 0.66 0.47 1 0.70 0 0 1 1 -0.68 -1.54 0.03
## V3 3 225 0.46 0.50 0 0.45 0 0 1 1 0.17 -1.98 0.03
## V4 4 225 0.39 0.49 0 0.36 0 0 1 1 0.46 -1.79 0.03
## V5 5 225 0.48 0.50 0 0.48 0 0 1 1 0.06 -2.01 0.03
## V6 6 225 0.54 0.50 1 0.55 0 0 1 1 -0.15 -1.99 0.03
## V7 7 225 0.74 0.44 1 0.80 0 0 1 1 -1.10 -0.79 0.03
## V8 8 225 0.78 0.42 1 0.85 0 0 1 1 -1.33 -0.24 0.03
## V9 9 225 0.36 0.48 0 0.32 0 0 1 1 0.60 -1.65 0.03
## V10 10 225 0.28 0.45 0 0.23 0 0 1 1 0.97 -1.06 0.03
## V11 11 225 0.70 0.46 1 0.75 0 0 1 1 -0.88 -1.23 0.03
## V12 12 225 0.77 0.42 1 0.83 0 0 1 1 -1.27 -0.40 0.03
## V13 13 225 0.71 0.45 1 0.76 0 0 1 1 -0.93 -1.15 0.03
## V14 14 225 0.48 0.50 0 0.48 0 0 1 1 0.06 -2.01 0.03
## V15 15 225 0.72 0.45 1 0.78 0 0 1 1 -1.00 -1.01 0.03
## V16 16 225 0.70 0.46 1 0.75 0 0 1 1 -0.88 -1.23 0.03
## V17 17 225 0.41 0.49 0 0.39 0 0 1 1 0.35 -1.89 0.03
## V18 18 225 0.37 0.48 0 0.34 0 0 1 1 0.52 -1.74 0.03
## V19 19 225 0.61 0.49 1 0.64 0 0 1 1 -0.44 -1.81 0.03
## V20 20 225 0.47 0.50 0 0.46 0 0 1 1 0.11 -2.00 0.03
# Step 1: Extract factors - report unrotated number of factors
# 1.1. Using principal components method for factor extraction
pc_result <- fa.parallel(data, fa = "pc")

## Parallel analysis suggests that the number of factors = NA and the number of components = 2
pc_actual_eigenvalues <- pc_result$pc.values
pc_simulated_eigenvalues <- pc_result$pc.sim
pc_eigenvalues_table <- data.frame(
Component = 1:length(pc_actual_eigenvalues),
Actual_Eigenvalue = pc_actual_eigenvalues,
Simulated_Eigenvalue = pc_simulated_eigenvalues
)
pc_eigenvalues_table
## Component Actual_Eigenvalue Simulated_Eigenvalue
## 1 1 3.5801707 1.5358605
## 2 2 1.6415253 1.4467587
## 3 3 1.3511136 1.3809128
## 4 4 1.2953122 1.3119726
## 5 5 1.2294961 1.2525381
## 6 6 1.1298351 1.1927684
## 7 7 0.9990179 1.1376473
## 8 8 0.9335611 1.1000611
## 9 9 0.9053272 1.0548242
## 10 10 0.8604354 1.0026892
## 11 11 0.8349938 0.9516663
## 12 12 0.7595781 0.9110127
## 13 13 0.7200692 0.8685861
## 14 14 0.6714808 0.8264659
## 15 15 0.6231413 0.7850590
## 16 16 0.6005184 0.7384335
## 17 17 0.5080066 0.7036627
## 18 18 0.5000640 0.6523678
## 19 19 0.4332614 0.5997090
## 20 20 0.4230918 0.5470041
# With the principal components method, it suggests that the number of factors = 2
# 1.2. Using common factors model for factor extraction
cf_result <- fa.parallel(data, fa = "fa")

## Parallel analysis suggests that the number of factors = 5 and the number of components = NA
cf_actual_eigenvalues <- cf_result$fa.values
cf_simulated_eigenvalues <- cf_result$fa.sim
cf_eigenvalues_table <- data.frame(
Factor = 1:length(cf_actual_eigenvalues),
Actual_Eigenvalue = cf_actual_eigenvalues,
Simulated_Eigenvalue = cf_simulated_eigenvalues
)
cf_eigenvalues_table
## Factor Actual_Eigenvalue Simulated_Eigenvalue
## 1 1 2.74870239 0.67810521
## 2 2 0.75848928 0.49904550
## 3 3 0.47503262 0.40609877
## 4 4 0.44104278 0.33722686
## 5 5 0.38349150 0.27024999
## 6 6 0.23957800 0.22145875
## 7 7 0.15569880 0.17519620
## 8 8 0.06725277 0.11803778
## 9 9 0.04572602 0.07200855
## 10 10 -0.03052189 0.02290730
## 11 11 -0.07657446 -0.02043572
## 12 12 -0.10579409 -0.06817762
## 13 13 -0.14273082 -0.10749052
## 14 14 -0.19914475 -0.14947462
## 15 15 -0.22654977 -0.18944912
## 16 16 -0.27398741 -0.23047638
## 17 17 -0.32591266 -0.27221873
## 18 18 -0.36467356 -0.31676497
## 19 19 -0.38546137 -0.35625797
## 20 20 -0.43496080 -0.41148512
# With the common factors model, it suggests that the number of factors = 5
# Step 2: Rotating the factors
# 2.1. Orthogonal rotation (Varimax)
ortho_result <- fa(data, nfactors = 2, rotate = "varimax")
print(ortho_result, cut = 0.4)
## Factor Analysis using method = minres
## Call: fa(r = data, nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 h2 u2 com
## V1 0.160 0.84 2.0
## V2 0.105 0.90 1.1
## V3 0.141 0.86 1.8
## V4 0.026 0.97 1.0
## V5 0.146 0.85 1.1
## V6 0.214 0.79 1.8
## V7 0.42 0.264 0.74 1.8
## V8 0.42 0.190 0.81 1.2
## V9 0.52 0.273 0.73 1.0
## V10 0.024 0.98 1.8
## V11 0.59 0.348 0.65 1.0
## V12 0.41 0.179 0.82 1.2
## V13 0.67 0.479 0.52 1.1
## V14 0.41 0.208 0.79 1.4
## V15 0.121 0.88 1.5
## V16 0.144 0.86 1.3
## V17 0.41 0.192 0.81 1.2
## V18 0.110 0.89 2.0
## V19 0.140 0.86 1.6
## V20 0.41 0.268 0.73 1.9
##
## MR1 MR2
## SS loadings 2.05 1.68
## Proportion Var 0.10 0.08
## Cumulative Var 0.10 0.19
## Proportion Explained 0.55 0.45
## Cumulative Proportion 0.55 1.00
##
## Mean item complexity = 1.4
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 190 with the objective function = 2.89 with Chi Square = 626.1
## df of the model are 151 and the objective function was 1
##
## The root mean square of the residuals (RMSR) is 0.06
## The df corrected root mean square of the residuals is 0.07
##
## The harmonic n.obs is 225 with the empirical chi square 148.71 with prob < 0.54
## The total n.obs was 225 with Likelihood Chi Square = 215.93 with prob < 0.00042
##
## Tucker Lewis Index of factoring reliability = 0.811
## RMSEA index = 0.043 and the 90 % confidence intervals are 0.03 0.056
## BIC = -601.9
## Fit based upon off diagonal values = 0.85
## Measures of factor score adequacy
## MR1 MR2
## Correlation of (regression) scores with factors 0.84 0.81
## Multiple R square of scores with factors 0.71 0.66
## Minimum correlation of possible factor scores 0.43 0.32
# 2.2. Oblique rotation (Oblimin)
oblique_result <- fa(data, nfactors = 5, rotate = "oblimin")
print(oblique_result, cut = 0.4)
## Factor Analysis using method = minres
## Call: fa(r = data, nfactors = 5, rotate = "oblimin")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR3 MR1 MR4 MR2 MR5 h2 u2 com
## V1 0.241 0.7593 2.7
## V2 0.142 0.8579 2.7
## V3 0.139 0.8615 4.1
## V4 0.071 0.9286 1.4
## V5 0.45 0.236 0.7639 1.2
## V6 0.220 0.7797 2.6
## V7 1.00 0.996 0.0039 1.0
## V8 0.45 0.287 0.7134 1.5
## V9 0.44 0.291 0.7088 1.7
## V10 0.084 0.9163 2.1
## V11 0.355 0.6453 2.4
## V12 0.337 0.6628 2.9
## V13 0.61 0.483 0.5174 1.3
## V14 0.270 0.7298 2.2
## V15 0.180 0.8205 2.3
## V16 0.235 0.7655 2.3
## V17 0.43 0.291 0.7090 1.8
## V18 0.55 0.292 0.7078 1.1
## V19 0.63 0.414 0.5862 1.1
## V20 0.55 0.414 0.5855 1.3
##
## MR3 MR1 MR4 MR2 MR5
## SS loadings 1.41 1.21 1.20 1.09 1.07
## Proportion Var 0.07 0.06 0.06 0.05 0.05
## Cumulative Var 0.07 0.13 0.19 0.25 0.30
## Proportion Explained 0.24 0.20 0.20 0.18 0.18
## Cumulative Proportion 0.24 0.44 0.64 0.82 1.00
##
## With factor correlations of
## MR3 MR1 MR4 MR2 MR5
## MR3 1.00 0.29 0.21 0.20 0.16
## MR1 0.29 1.00 0.25 0.18 0.27
## MR4 0.21 0.25 1.00 0.26 0.22
## MR2 0.20 0.18 0.26 1.00 -0.01
## MR5 0.16 0.27 0.22 -0.01 1.00
##
## Mean item complexity = 2
## Test of the hypothesis that 5 factors are sufficient.
##
## df null model = 190 with the objective function = 2.89 with Chi Square = 626.1
## df of the model are 100 and the objective function was 0.46
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic n.obs is 225 with the empirical chi square 55.5 with prob < 1
## The total n.obs was 225 with Likelihood Chi Square = 97.39 with prob < 0.56
##
## Tucker Lewis Index of factoring reliability = 1.012
## RMSEA index = 0 and the 90 % confidence intervals are 0 0.033
## BIC = -444.22
## Fit based upon off diagonal values = 0.95
## Measures of factor score adequacy
## MR3 MR1 MR4 MR2 MR5
## Correlation of (regression) scores with factors 0.98 0.72 0.73 0.74 0.72
## Multiple R square of scores with factors 0.96 0.52 0.54 0.55 0.52
## Minimum correlation of possible factor scores 0.93 0.03 0.08 0.11 0.04