# Course: EDF 6436 - Theory of Measurement
# Assignment: EFA
# Author: Nguyet Hoang | hoangt@ufl.edu

# Set up R
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(conflicted)
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package.
library(psych)
library(scales)
library(GPArotation)

# Read data
data <- read.csv("ToMassignment1data.csv", sep = ";", header = TRUE)
view(data)

# Univariate descriptives
describe(data)
##     vars   n mean   sd median trimmed mad min max range  skew kurtosis   se
## V1     1 225 0.67 0.47      1    0.71   0   0   1     1 -0.72    -1.48 0.03
## V2     2 225 0.66 0.47      1    0.70   0   0   1     1 -0.68    -1.54 0.03
## V3     3 225 0.46 0.50      0    0.45   0   0   1     1  0.17    -1.98 0.03
## V4     4 225 0.39 0.49      0    0.36   0   0   1     1  0.46    -1.79 0.03
## V5     5 225 0.48 0.50      0    0.48   0   0   1     1  0.06    -2.01 0.03
## V6     6 225 0.54 0.50      1    0.55   0   0   1     1 -0.15    -1.99 0.03
## V7     7 225 0.74 0.44      1    0.80   0   0   1     1 -1.10    -0.79 0.03
## V8     8 225 0.78 0.42      1    0.85   0   0   1     1 -1.33    -0.24 0.03
## V9     9 225 0.36 0.48      0    0.32   0   0   1     1  0.60    -1.65 0.03
## V10   10 225 0.28 0.45      0    0.23   0   0   1     1  0.97    -1.06 0.03
## V11   11 225 0.70 0.46      1    0.75   0   0   1     1 -0.88    -1.23 0.03
## V12   12 225 0.77 0.42      1    0.83   0   0   1     1 -1.27    -0.40 0.03
## V13   13 225 0.71 0.45      1    0.76   0   0   1     1 -0.93    -1.15 0.03
## V14   14 225 0.48 0.50      0    0.48   0   0   1     1  0.06    -2.01 0.03
## V15   15 225 0.72 0.45      1    0.78   0   0   1     1 -1.00    -1.01 0.03
## V16   16 225 0.70 0.46      1    0.75   0   0   1     1 -0.88    -1.23 0.03
## V17   17 225 0.41 0.49      0    0.39   0   0   1     1  0.35    -1.89 0.03
## V18   18 225 0.37 0.48      0    0.34   0   0   1     1  0.52    -1.74 0.03
## V19   19 225 0.61 0.49      1    0.64   0   0   1     1 -0.44    -1.81 0.03
## V20   20 225 0.47 0.50      0    0.46   0   0   1     1  0.11    -2.00 0.03
# Step 1: Extract factors - report unrotated number of factors
# 1.1. Using principal components method for factor extraction
pc_result <- fa.parallel(data, fa = "pc")

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  2
pc_actual_eigenvalues <- pc_result$pc.values
pc_simulated_eigenvalues <- pc_result$pc.sim
pc_eigenvalues_table <- data.frame(
  Component = 1:length(pc_actual_eigenvalues),
  Actual_Eigenvalue = pc_actual_eigenvalues,
  Simulated_Eigenvalue = pc_simulated_eigenvalues
)
pc_eigenvalues_table
##    Component Actual_Eigenvalue Simulated_Eigenvalue
## 1          1         3.5801707            1.5358605
## 2          2         1.6415253            1.4467587
## 3          3         1.3511136            1.3809128
## 4          4         1.2953122            1.3119726
## 5          5         1.2294961            1.2525381
## 6          6         1.1298351            1.1927684
## 7          7         0.9990179            1.1376473
## 8          8         0.9335611            1.1000611
## 9          9         0.9053272            1.0548242
## 10        10         0.8604354            1.0026892
## 11        11         0.8349938            0.9516663
## 12        12         0.7595781            0.9110127
## 13        13         0.7200692            0.8685861
## 14        14         0.6714808            0.8264659
## 15        15         0.6231413            0.7850590
## 16        16         0.6005184            0.7384335
## 17        17         0.5080066            0.7036627
## 18        18         0.5000640            0.6523678
## 19        19         0.4332614            0.5997090
## 20        20         0.4230918            0.5470041
# With the principal components method, it suggests that the number of factors = 2
# 1.2. Using common factors model for factor extraction
cf_result <- fa.parallel(data, fa = "fa")

## Parallel analysis suggests that the number of factors =  5  and the number of components =  NA
cf_actual_eigenvalues <- cf_result$fa.values
cf_simulated_eigenvalues <- cf_result$fa.sim
cf_eigenvalues_table <- data.frame(
  Factor = 1:length(cf_actual_eigenvalues),
  Actual_Eigenvalue = cf_actual_eigenvalues,
  Simulated_Eigenvalue = cf_simulated_eigenvalues
)
cf_eigenvalues_table
##    Factor Actual_Eigenvalue Simulated_Eigenvalue
## 1       1        2.74870239           0.67810521
## 2       2        0.75848928           0.49904550
## 3       3        0.47503262           0.40609877
## 4       4        0.44104278           0.33722686
## 5       5        0.38349150           0.27024999
## 6       6        0.23957800           0.22145875
## 7       7        0.15569880           0.17519620
## 8       8        0.06725277           0.11803778
## 9       9        0.04572602           0.07200855
## 10     10       -0.03052189           0.02290730
## 11     11       -0.07657446          -0.02043572
## 12     12       -0.10579409          -0.06817762
## 13     13       -0.14273082          -0.10749052
## 14     14       -0.19914475          -0.14947462
## 15     15       -0.22654977          -0.18944912
## 16     16       -0.27398741          -0.23047638
## 17     17       -0.32591266          -0.27221873
## 18     18       -0.36467356          -0.31676497
## 19     19       -0.38546137          -0.35625797
## 20     20       -0.43496080          -0.41148512
# With the common factors model, it suggests that the number of factors = 5

# Step 2: Rotating the factors
# 2.1. Orthogonal rotation (Varimax)
ortho_result <- fa(data, nfactors = 2, rotate = "varimax")
print(ortho_result, cut = 0.4)
## Factor Analysis using method =  minres
## Call: fa(r = data, nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       MR1   MR2    h2   u2 com
## V1              0.160 0.84 2.0
## V2              0.105 0.90 1.1
## V3              0.141 0.86 1.8
## V4              0.026 0.97 1.0
## V5              0.146 0.85 1.1
## V6              0.214 0.79 1.8
## V7   0.42       0.264 0.74 1.8
## V8   0.42       0.190 0.81 1.2
## V9         0.52 0.273 0.73 1.0
## V10             0.024 0.98 1.8
## V11        0.59 0.348 0.65 1.0
## V12  0.41       0.179 0.82 1.2
## V13  0.67       0.479 0.52 1.1
## V14  0.41       0.208 0.79 1.4
## V15             0.121 0.88 1.5
## V16             0.144 0.86 1.3
## V17  0.41       0.192 0.81 1.2
## V18             0.110 0.89 2.0
## V19             0.140 0.86 1.6
## V20  0.41       0.268 0.73 1.9
## 
##                        MR1  MR2
## SS loadings           2.05 1.68
## Proportion Var        0.10 0.08
## Cumulative Var        0.10 0.19
## Proportion Explained  0.55 0.45
## Cumulative Proportion 0.55 1.00
## 
## Mean item complexity =  1.4
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  190  with the objective function =  2.89 with Chi Square =  626.1
## df of  the model are 151  and the objective function was  1 
## 
## The root mean square of the residuals (RMSR) is  0.06 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic n.obs is  225 with the empirical chi square  148.71  with prob <  0.54 
## The total n.obs was  225  with Likelihood Chi Square =  215.93  with prob <  0.00042 
## 
## Tucker Lewis Index of factoring reliability =  0.811
## RMSEA index =  0.043  and the 90 % confidence intervals are  0.03 0.056
## BIC =  -601.9
## Fit based upon off diagonal values = 0.85
## Measures of factor score adequacy             
##                                                    MR1  MR2
## Correlation of (regression) scores with factors   0.84 0.81
## Multiple R square of scores with factors          0.71 0.66
## Minimum correlation of possible factor scores     0.43 0.32
# 2.2. Oblique rotation (Oblimin)
oblique_result <- fa(data, nfactors = 5, rotate = "oblimin")
print(oblique_result, cut = 0.4)
## Factor Analysis using method =  minres
## Call: fa(r = data, nfactors = 5, rotate = "oblimin")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       MR3   MR1   MR4   MR2   MR5    h2     u2 com
## V1                                0.241 0.7593 2.7
## V2                                0.142 0.8579 2.7
## V3                                0.139 0.8615 4.1
## V4                                0.071 0.9286 1.4
## V5                           0.45 0.236 0.7639 1.2
## V6                                0.220 0.7797 2.6
## V7   1.00                         0.996 0.0039 1.0
## V8               0.45             0.287 0.7134 1.5
## V9                           0.44 0.291 0.7088 1.7
## V10                               0.084 0.9163 2.1
## V11                               0.355 0.6453 2.4
## V12                               0.337 0.6628 2.9
## V13                    0.61       0.483 0.5174 1.3
## V14                               0.270 0.7298 2.2
## V15                               0.180 0.8205 2.3
## V16                               0.235 0.7655 2.3
## V17                    0.43       0.291 0.7090 1.8
## V18        0.55                   0.292 0.7078 1.1
## V19              0.63             0.414 0.5862 1.1
## V20        0.55                   0.414 0.5855 1.3
## 
##                        MR3  MR1  MR4  MR2  MR5
## SS loadings           1.41 1.21 1.20 1.09 1.07
## Proportion Var        0.07 0.06 0.06 0.05 0.05
## Cumulative Var        0.07 0.13 0.19 0.25 0.30
## Proportion Explained  0.24 0.20 0.20 0.18 0.18
## Cumulative Proportion 0.24 0.44 0.64 0.82 1.00
## 
##  With factor correlations of 
##      MR3  MR1  MR4   MR2   MR5
## MR3 1.00 0.29 0.21  0.20  0.16
## MR1 0.29 1.00 0.25  0.18  0.27
## MR4 0.21 0.25 1.00  0.26  0.22
## MR2 0.20 0.18 0.26  1.00 -0.01
## MR5 0.16 0.27 0.22 -0.01  1.00
## 
## Mean item complexity =  2
## Test of the hypothesis that 5 factors are sufficient.
## 
## df null model =  190  with the objective function =  2.89 with Chi Square =  626.1
## df of  the model are 100  and the objective function was  0.46 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.05 
## 
## The harmonic n.obs is  225 with the empirical chi square  55.5  with prob <  1 
## The total n.obs was  225  with Likelihood Chi Square =  97.39  with prob <  0.56 
## 
## Tucker Lewis Index of factoring reliability =  1.012
## RMSEA index =  0  and the 90 % confidence intervals are  0 0.033
## BIC =  -444.22
## Fit based upon off diagonal values = 0.95
## Measures of factor score adequacy             
##                                                    MR3  MR1  MR4  MR2  MR5
## Correlation of (regression) scores with factors   0.98 0.72 0.73 0.74 0.72
## Multiple R square of scores with factors          0.96 0.52 0.54 0.55 0.52
## Minimum correlation of possible factor scores     0.93 0.03 0.08 0.11 0.04