library(lavaan)
## Warning: package 'lavaan' was built under R version 4.1.2
## This is lavaan 0.6-9
## lavaan is FREE software! Please report any bugs.
#?HolzingerSwineford1939
#View(HolzingerSwineford1939)
data(HolzingerSwineford1939)
# specify the model
HS.model <- ' visual  =~ x1 + x2 + x3      
              textual =~ x4 + x5 + x6
              speed   =~ x7 + x8 + x9 '

# fit the model
fit <- cfa(HS.model, data = HolzingerSwineford1939)

# display summary output
summary(fit, fit.measures = TRUE)
## lavaan 0.6-9 ended normally after 35 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        21
##                                                       
##   Number of observations                           301
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                85.306
##   Degrees of freedom                                24
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                               918.852
##   Degrees of freedom                                36
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.931
##   Tucker-Lewis Index (TLI)                       0.896
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -3737.745
##   Loglikelihood unrestricted model (H1)      -3695.092
##                                                       
##   Akaike (AIC)                                7517.490
##   Bayesian (BIC)                              7595.339
##   Sample-size adjusted Bayesian (BIC)         7528.739
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.092
##   90 Percent confidence interval - lower         0.071
##   90 Percent confidence interval - upper         0.114
##   P-value RMSEA <= 0.05                          0.001
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.065
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual =~                                           
##     x1                1.000                           
##     x2                0.554    0.100    5.554    0.000
##     x3                0.729    0.109    6.685    0.000
##   textual =~                                          
##     x4                1.000                           
##     x5                1.113    0.065   17.014    0.000
##     x6                0.926    0.055   16.703    0.000
##   speed =~                                            
##     x7                1.000                           
##     x8                1.180    0.165    7.152    0.000
##     x9                1.082    0.151    7.155    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual ~~                                           
##     textual           0.408    0.074    5.552    0.000
##     speed             0.262    0.056    4.660    0.000
##   textual ~~                                          
##     speed             0.173    0.049    3.518    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .x1                0.549    0.114    4.833    0.000
##    .x2                1.134    0.102   11.146    0.000
##    .x3                0.844    0.091    9.317    0.000
##    .x4                0.371    0.048    7.779    0.000
##    .x5                0.446    0.058    7.642    0.000
##    .x6                0.356    0.043    8.277    0.000
##    .x7                0.799    0.081    9.823    0.000
##    .x8                0.488    0.074    6.573    0.000
##    .x9                0.566    0.071    8.003    0.000
##     visual            0.809    0.145    5.564    0.000
##     textual           0.979    0.112    8.737    0.000
##     speed             0.384    0.086    4.451    0.000
head(lavPredict(fit))
##           visual     textual       speed
## [1,] -0.81767524 -0.13754501  0.06150726
## [2,]  0.04951940 -1.01272402  0.62549360
## [3,] -0.76139670 -1.87228634 -0.84057276
## [4,]  0.41934153  0.01848569 -0.27133710
## [5,] -0.41590481 -0.12225009  0.19432951
## [6,]  0.02325632 -1.32981727  0.70885348
head(lavPredict(fit, type = "ov"))
##            x1       x2       x3       x4       x5        x6       x7       x8
## [1,] 4.118094 5.635456 1.654027 2.923363 4.187433 2.0581851 4.247409 5.599652
## [2,] 4.985289 6.115449 2.286533 2.048184 3.213292 1.2476414 4.811396 6.265128
## [3,] 4.174373 5.666607 1.695075 1.188622 2.256533 0.4515610 3.345329 4.535242
## [4,] 5.355111 6.320146 2.556271 3.079394 4.361108 2.2026924 3.914565 5.206912
## [5,] 4.519865 5.857836 1.947067 2.938658 4.204458 2.0723504 4.380232 5.756376
## [6,] 4.959026 6.100912 2.267378 1.731091 2.860343 0.9539666 4.894756 6.363489
##            x9
## [1,] 5.440645
## [2,] 6.050613
## [3,] 4.465019
## [4,] 5.080664
## [5,] 5.584297
## [6,] 6.140770
## merge factor scores to original data.frame
idx <- lavInspect(fit, "case.idx")
fscores <- lavPredict(fit)
## loop over factors
for (fs in colnames(fscores)) {
  HolzingerSwineford1939[idx, fs] <- fscores[ , fs]
}
head(HolzingerSwineford1939)
##   id sex ageyr agemo  school grade       x1   x2    x3       x4   x5        x6
## 1  1   1    13     1 Pasteur     7 3.333333 7.75 0.375 2.333333 5.75 1.2857143
## 2  2   2    13     7 Pasteur     7 5.333333 5.25 2.125 1.666667 3.00 1.2857143
## 3  3   2    13     1 Pasteur     7 4.500000 5.25 1.875 1.000000 1.75 0.4285714
## 4  4   1    13     2 Pasteur     7 5.333333 7.75 3.000 2.666667 4.50 2.4285714
## 5  5   2    12     2 Pasteur     7 4.833333 4.75 0.875 2.666667 4.00 2.5714286
## 6  6   2    14     1 Pasteur     7 5.333333 5.00 2.250 1.000000 3.00 0.8571429
##         x7   x8       x9      visual     textual       speed
## 1 3.391304 5.75 6.361111 -0.81767524 -0.13754501  0.06150726
## 2 3.782609 6.25 7.916667  0.04951940 -1.01272402  0.62549360
## 3 3.260870 3.90 4.416667 -0.76139670 -1.87228634 -0.84057276
## 4 3.000000 5.30 4.861111  0.41934153  0.01848569 -0.27133710
## 5 3.695652 6.30 5.916667 -0.41590481 -0.12225009  0.19432951
## 6 4.347826 6.65 7.500000  0.02325632 -1.32981727  0.70885348
#install.packages("semPlot")
library(semPlot)
## Warning: package 'semPlot' was built under R version 4.1.2
semPaths(fit, "model", "estimates")

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
## Warning: package 'psych' was built under R version 4.1.2
## 
## Attaching package: 'psych'
## The following object is masked from 'package:lavaan':
## 
##     cor2cov
test<-select(HolzingerSwineford1939, x1:x9)
lowerCor(test)
##    x1    x2    x3    x4    x5    x6    x7    x8    x9   
## x1  1.00                                                
## x2  0.30  1.00                                          
## x3  0.44  0.34  1.00                                    
## x4  0.37  0.15  0.16  1.00                              
## x5  0.29  0.14  0.08  0.73  1.00                        
## x6  0.36  0.19  0.20  0.70  0.72  1.00                  
## x7  0.07 -0.08  0.07  0.17  0.10  0.12  1.00            
## x8  0.22  0.09  0.19  0.11  0.14  0.15  0.49  1.00      
## x9  0.39  0.21  0.33  0.21  0.23  0.21  0.34  0.45  1.00
pairs.panels(test,pch='.')

cor.plot(test,numbers=TRUE,main="9 variables from Test Score")

uls <- fa(test,3,rotate="varimax")
print(uls)
## Factor Analysis using method =  minres
## Call: fa(r = test, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##     MR1   MR3   MR2   h2   u2 com
## x1 0.28  0.61  0.15 0.48 0.52 1.5
## x2 0.10  0.49 -0.03 0.26 0.74 1.1
## x3 0.04  0.66  0.13 0.45 0.55 1.1
## x4 0.83  0.16  0.10 0.73 0.27 1.1
## x5 0.86  0.09  0.09 0.75 0.25 1.0
## x6 0.80  0.21  0.09 0.69 0.31 1.2
## x7 0.09 -0.08  0.71 0.52 0.48 1.1
## x8 0.05  0.17  0.70 0.52 0.48 1.1
## x9 0.13  0.41  0.52 0.46 0.54 2.0
## 
##                        MR1  MR3  MR2
## SS loadings           2.19 1.34 1.33
## Proportion Var        0.24 0.15 0.15
## Cumulative Var        0.24 0.39 0.54
## Proportion Explained  0.45 0.28 0.27
## Cumulative Proportion 0.45 0.73 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  36  and the objective function was  3.05 with Chi Square of  904.1
## The degrees of freedom for the model are 12  and the objective function was  0.08 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.03 
## 
## The harmonic number of observations is  301 with the empirical chi square  7.87  with prob <  0.8 
## The total number of observations was  301  with Likelihood Chi Square =  22.56  with prob <  0.032 
## 
## Tucker Lewis Index of factoring reliability =  0.963
## RMSEA index =  0.054  and the 90 % confidence intervals are  0.016 0.088
## BIC =  -45.93
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    MR1  MR3  MR2
## Correlation of (regression) scores with factors   0.93 0.81 0.84
## Multiple R square of scores with factors          0.87 0.66 0.70
## Minimum correlation of possible factor scores     0.74 0.32 0.41
fa.diagram(uls,digits = 2,main="Test Score Factors")

plot(uls)

#add factor scores
f <- factanal(test, factors=3, rotation="promax", scores="regression")
data <- cbind(test, f$scores)  #add two variables
#another solution
fs <- factor.scores(test,uls)     #obtain factor scores
fs <- fs$scores                   #get the columns of factor scores for each case
bfi <- cbind(test,fs)              #append factor scores to dataset