CFA Practice - From EFA Class Exercise Using Leanne Data

Author

Margaret Gatongi

Code
library(tidyverse) #all things data management
library(haven) #importing data from other packages
library(lavaan) #all things SEM
library(semPlot) #cool path diagrams for SEM models
library(dplyr)
Code
leanne <- read_dta("leanne.dta")
view(leanne)
glimpse(leanne)
Rows: 310
Columns: 35
$ Subid        <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
$ Sex          <dbl+lbl>  1,  1,  1,  1,  1,  1,  2,  1,  1,  1,  1,  1,  1,  …
$ Age          <dbl> 21, 19, 20, 24, 22, 18, 19, 19, 24, 19, 24, 22, 18, 18, 1…
$ ethnic_group <dbl+lbl> NA,  2,  2,  3,  2,  3,  4,  2,  3,  2, NA, NA, NA,  …
$ Ethnicity    <dbl+lbl> 8, 2, 2, 6, 5, 6, 4, 2, 6, 2, 8, 7, 7, 2, 6, 6, 4, 2,…
$ aspire01     <dbl> 9, 9, 7, 9, 8, 9, 9, 7, 7, 9, 9, 6, 9, 9, 8, 9, 7, 8, 9, …
$ aspire02     <dbl> 9, 9, 9, 9, 8, 9, 9, 7, 9, 9, 9, 9, 9, 3, 9, 9, 9, 9, 9, …
$ aspire03     <dbl> 8, 9, 8, 8, 6, 9, 8, 6, 6, 9, 7, 7, 9, 7, 7, 6, 7, 9, 4, …
$ aspire04     <dbl> 7, 6, 6, 6, 5, 9, 8, 3, 4, 5, 3, 5, 9, 3, 4, 8, 1, 5, 9, …
$ aspire05     <dbl> 7, 8, 6, 2, 6, 9, 7, 2, 3, 1, 5, 5, 9, 7, 2, 8, 4, 5, 3, …
$ aspire06     <dbl> 9, 9, 9, 8, 9, 9, 9, 9, 9, 5, 4, 7, 9, 7, 7, 9, 7, 7, 9, …
$ aspire07     <dbl> 9, 9, 9, 7, 9, 9, 8, 9, 9, 5, 9, 8, 9, 5, 9, 9, 8, 8, 9, …
$ aspire08     <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 6, 8, 9, 9, 9, 8, 6, 8, 5, …
$ aspire09     <dbl> 8, 8, 5, 8, 7, 9, 8, 9, 9, 9, 9, 8, 9, 9, 7, 7, 6, 8, 6, …
$ aspire10     <dbl> 6, 6, 5, 6, 5, 1, 7, 1, 1, 1, 2, 5, 9, 2, 4, 8, 1, 5, 9, …
$ aspire11     <dbl> 7, 6, 6, 6, 3, 1, 5, 2, 2, 1, 1, 4, 1, 2, 3, 9, 3, 7, 3, …
$ aspire12     <dbl> 8, 9, 9, 8, 9, 9, 9, 9, 9, 5, 4, 7, 9, 8, 7, 8, 8, 7, 9, …
$ aspire13     <dbl> 7, 9, 6, 8, 6, 9, 7, 6, 5, 9, 9, 7, 9, 8, 7, 9, 7, 8, 9, …
$ aspire14     <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 9, 9, 5, 8, 9, …
$ aspire15     <dbl> 9, 7, 5, 8, 7, 9, 7, 7, 9, 9, 9, 7, 9, 9, 8, 7, 3, 8, 4, …
$ aspire16     <dbl> 9, 7, 5, 6, 5, 1, 6, 5, 3, 1, 4, 4, 9, 2, 6, 8, 1, 5, 9, …
$ aspire17     <dbl> 9, 8, 4, 6, 3, 1, 4, 3, 3, 1, 5, 2, 1, 1, 4, 8, 4, 7, 3, …
$ aspire18     <dbl> 6, 7, 5, 4, 7, 1, 7, 4, 4, 1, 1, 2, 2, 1, 1, 9, 7, 5, 9, …
$ aspire19     <dbl> 9, 9, 7, 8, 9, 9, 9, 8, 8, 8, 9, 6, 9, 9, 9, 9, 9, -1, 9,…
$ aspire20     <dbl> 7, 8, 9, 9, 9, 9, 9, 8, 9, 9, 9, 8, 9, 2, 8, 9, 5, 8, 9, …
$ aspire21     <dbl> 8, 9, 7, 8, 7, 9, 7, 5, 9, 9, 9, 6, 9, 7, 7, 7, 8, 8, 7, …
$ aspire22     <dbl> 6, 3, 1, 3, 2, 1, 6, 4, 1, 1, 1, 1, 1, 1, 1, 8, 1, 5, 9, …
$ aspire23     <dbl> 6, 5, 2, 6, 2, 1, 6, 3, 4, 1, 4, 4, 1, 1, 1, 7, 5, 6, 4, …
$ aspire24     <dbl> 7, 8, 5, 8, 8, 1, 7, 8, 8, 1, 1, 2, 9, 2, 7, 9, 3, 7, 9, …
$ aspire25     <dbl> 9, 9, 6, 8, 8, 9, 9, 7, 9, 9, 9, 8, 9, 7, 8, 9, 6, 8, 7, …
$ aspire26     <dbl> 9, 7, 9, 9, 9, 9, 9, 8, 9, 8, 9, 9, 9, 7, 9, 9, 5, 8, 7, …
$ aspire27     <dbl> 8, 9, 6, 8, 7, 9, 7, 6, 9, 9, 9, 7, 9, 8, 8, 7, 8, 8, 6, …
$ aspire28     <dbl> 4, 1, 1, 3, 2, 1, 4, 2, 1, 1, 2, 1, 1, 1, 1, 6, 1, 5, 9, …
$ aspire29     <dbl> 5, 5, 1, 5, 2, 1, 5, 2, 1, 1, 7, 3, 1, 1, 1, 9, 1, 7, 9, …
$ aspire30     <dbl> 9, 9, 7, 8, 9, 1, 7, 9, 9, 1, 1, 6, 9, 5, 5, 5, 6, 7, 9, …
Code
leanne.clean <-leanne %>%
  select(
    -Subid,
    -Sex,
    -Age,
    -ethnic_group,
    -Ethnicity
  )
view(leanne.clean)
Code
#double check that reverse coding worked
glimpse(leanne.clean)
Rows: 310
Columns: 30
$ aspire01 <dbl> 9, 9, 7, 9, 8, 9, 9, 7, 7, 9, 9, 6, 9, 9, 8, 9, 7, 8, 9, 8, 5…
$ aspire02 <dbl> 9, 9, 9, 9, 8, 9, 9, 7, 9, 9, 9, 9, 9, 3, 9, 9, 9, 9, 9, 9, 9…
$ aspire03 <dbl> 8, 9, 8, 8, 6, 9, 8, 6, 6, 9, 7, 7, 9, 7, 7, 6, 7, 9, 4, 9, 8…
$ aspire04 <dbl> 7, 6, 6, 6, 5, 9, 8, 3, 4, 5, 3, 5, 9, 3, 4, 8, 1, 5, 9, 5, 4…
$ aspire05 <dbl> 7, 8, 6, 2, 6, 9, 7, 2, 3, 1, 5, 5, 9, 7, 2, 8, 4, 5, 3, 2, 4…
$ aspire06 <dbl> 9, 9, 9, 8, 9, 9, 9, 9, 9, 5, 4, 7, 9, 7, 7, 9, 7, 7, 9, 8, 6…
$ aspire07 <dbl> 9, 9, 9, 7, 9, 9, 8, 9, 9, 5, 9, 8, 9, 5, 9, 9, 8, 8, 9, 9, 9…
$ aspire08 <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 6, 8, 9, 9, 9, 8, 6, 8, 5, 9, 9…
$ aspire09 <dbl> 8, 8, 5, 8, 7, 9, 8, 9, 9, 9, 9, 8, 9, 9, 7, 7, 6, 8, 6, 7, 8…
$ aspire10 <dbl> 6, 6, 5, 6, 5, 1, 7, 1, 1, 1, 2, 5, 9, 2, 4, 8, 1, 5, 9, 4, 5…
$ aspire11 <dbl> 7, 6, 6, 6, 3, 1, 5, 2, 2, 1, 1, 4, 1, 2, 3, 9, 3, 7, 3, 1, 3…
$ aspire12 <dbl> 8, 9, 9, 8, 9, 9, 9, 9, 9, 5, 4, 7, 9, 8, 7, 8, 8, 7, 9, 7, 6…
$ aspire13 <dbl> 7, 9, 6, 8, 6, 9, 7, 6, 5, 9, 9, 7, 9, 8, 7, 9, 7, 8, 9, 9, 8…
$ aspire14 <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 9, 9, 5, 8, 9, 8, 9…
$ aspire15 <dbl> 9, 7, 5, 8, 7, 9, 7, 7, 9, 9, 9, 7, 9, 9, 8, 7, 3, 8, 4, 6, 8…
$ aspire16 <dbl> 9, 7, 5, 6, 5, 1, 6, 5, 3, 1, 4, 4, 9, 2, 6, 8, 1, 5, 9, 4, 6…
$ aspire17 <dbl> 9, 8, 4, 6, 3, 1, 4, 3, 3, 1, 5, 2, 1, 1, 4, 8, 4, 7, 3, 2, 3…
$ aspire18 <dbl> 6, 7, 5, 4, 7, 1, 7, 4, 4, 1, 1, 2, 2, 1, 1, 9, 7, 5, 9, 2, 3…
$ aspire19 <dbl> 9, 9, 7, 8, 9, 9, 9, 8, 8, 8, 9, 6, 9, 9, 9, 9, 9, -1, 9, 9, …
$ aspire20 <dbl> 7, 8, 9, 9, 9, 9, 9, 8, 9, 9, 9, 8, 9, 2, 8, 9, 5, 8, 9, 9, 9…
$ aspire21 <dbl> 8, 9, 7, 8, 7, 9, 7, 5, 9, 9, 9, 6, 9, 7, 7, 7, 8, 8, 7, 7, 8…
$ aspire22 <dbl> 6, 3, 1, 3, 2, 1, 6, 4, 1, 1, 1, 1, 1, 1, 1, 8, 1, 5, 9, 3, 3…
$ aspire23 <dbl> 6, 5, 2, 6, 2, 1, 6, 3, 4, 1, 4, 4, 1, 1, 1, 7, 5, 6, 4, 2, 3…
$ aspire24 <dbl> 7, 8, 5, 8, 8, 1, 7, 8, 8, 1, 1, 2, 9, 2, 7, 9, 3, 7, 9, 3, 3…
$ aspire25 <dbl> 9, 9, 6, 8, 8, 9, 9, 7, 9, 9, 9, 8, 9, 7, 8, 9, 6, 8, 7, 8, 8…
$ aspire26 <dbl> 9, 7, 9, 9, 9, 9, 9, 8, 9, 8, 9, 9, 9, 7, 9, 9, 5, 8, 7, 9, 9…
$ aspire27 <dbl> 8, 9, 6, 8, 7, 9, 7, 6, 9, 9, 9, 7, 9, 8, 8, 7, 8, 8, 6, 7, 9…
$ aspire28 <dbl> 4, 1, 1, 3, 2, 1, 4, 2, 1, 1, 2, 1, 1, 1, 1, 6, 1, 5, 9, 3, 3…
$ aspire29 <dbl> 5, 5, 1, 5, 2, 1, 5, 2, 1, 1, 7, 3, 1, 1, 1, 9, 1, 7, 9, 2, 5…
$ aspire30 <dbl> 9, 9, 7, 8, 9, 1, 7, 9, 9, 1, 1, 6, 9, 5, 5, 5, 6, 7, 9, 4, 3…

From Promax results:

#PA1=Fame; PA2=Altruism; PA3=Finances; PA4=Relationships (loadings less than 0.5??)

#fame_cfa1 <- ‘fame ~~ 1*fame =~ aspire04 + aspire05 + aspire10 + aspire11 + aspire16 + aspire17 + aspire18 + aspire22 + aspire23 + aspire28 + aspire29’ fame_cfa1_model <- cfa(fame_cfa1, data=leanne.clean) summary(fame_cfa1_model)

Code
#specifying, running and interpreting CFA for the "fame" subscale
##Note the single quotation mark at the start and end of the CFA model below. This is required for 'laavan' to correctly interpret and estimate your CFA model.
fame_cfa1 <- 'fame =~ aspire04 + aspire05 + aspire10 + aspire11 + aspire16 + aspire17 + aspire18 + aspire22 + aspire23 + aspire28 + aspire29'
fame_cfa1_model <- cfa(fame_cfa1, data=leanne.clean)
summary(fame_cfa1_model)
lavaan 0.6.17 ended normally after 30 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        22

  Number of observations                           310

Model Test User Model:
                                                      
  Test statistic                               515.306
  Degrees of freedom                                44
  P-value (Chi-square)                           0.000

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)
  fame =~                                             
    aspire04          1.000                           
    aspire05          1.027    0.131    7.818    0.000
    aspire10          1.325    0.140    9.464    0.000
    aspire11          1.465    0.146   10.025    0.000
    aspire16          1.240    0.138    9.017    0.000
    aspire17          1.378    0.147    9.365    0.000
    aspire18          1.313    0.143    9.198    0.000
    aspire22          1.204    0.127    9.501    0.000
    aspire23          1.380    0.145    9.504    0.000
    aspire28          1.009    0.112    8.999    0.000
    aspire29          1.303    0.144    9.057    0.000

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)
   .aspire04          3.534    0.297   11.903    0.000
   .aspire05          4.031    0.337   11.945    0.000
   .aspire10          2.569    0.231   11.125    0.000
   .aspire11          1.949    0.189   10.309    0.000
   .aspire16          3.047    0.266   11.472    0.000
   .aspire17          2.986    0.266   11.217    0.000
   .aspire18          3.040    0.268   11.350    0.000
   .aspire22          2.064    0.186   11.088    0.000
   .aspire23          2.708    0.244   11.085    0.000
   .aspire28          2.038    0.177   11.483    0.000
   .aspire29          3.280    0.287   11.447    0.000
    fame              1.604    0.312    5.142    0.000
Code
altruism_cfa1 <- 'altruism =~ aspire03 + aspire09 + aspire15 + aspire21 + aspire25 + aspire27'
altruism_cfa1_model <- cfa(altruism_cfa1, data=leanne.clean)
summary(altruism_cfa1_model)
lavaan 0.6.17 ended normally after 26 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        12

  Number of observations                           310

Model Test User Model:
                                                      
  Test statistic                                77.859
  Degrees of freedom                                 9
  P-value (Chi-square)                           0.000

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)
  altruism =~                                         
    aspire03          1.000                           
    aspire09          1.393    0.151    9.206    0.000
    aspire15          1.588    0.166    9.572    0.000
    aspire21          1.530    0.149   10.235    0.000
    aspire25          0.767    0.105    7.295    0.000
    aspire27          1.556    0.152   10.215    0.000

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)
   .aspire03          1.343    0.116   11.610    0.000
   .aspire09          1.390    0.128   10.858    0.000
   .aspire15          1.399    0.135   10.380    0.000
   .aspire21          0.687    0.081    8.468    0.000
   .aspire25          1.161    0.098   11.881    0.000
   .aspire27          0.729    0.085    8.567    0.000
    altruism          0.656    0.126    5.206    0.000
Code
finances_cfa1 <- 'finances =~ aspire06 + aspire12 + aspire24 + aspire30'
finances_cfa1_model <- cfa(finances_cfa1, data=leanne.clean)
summary(finances_cfa1_model)
lavaan 0.6.17 ended normally after 24 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                         8

  Number of observations                           310

Model Test User Model:
                                                      
  Test statistic                                76.670
  Degrees of freedom                                 2
  P-value (Chi-square)                           0.000

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)
  finances =~                                         
    aspire06          1.000                           
    aspire12          1.125    0.074   15.295    0.000
    aspire24          1.239    0.105   11.798    0.000
    aspire30          1.103    0.104   10.561    0.000

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)
   .aspire06          0.637    0.092    6.930    0.000
   .aspire12          0.691    0.111    6.223    0.000
   .aspire24          3.307    0.300   11.019    0.000
   .aspire30          3.559    0.312   11.408    0.000
    finances          1.589    0.188    8.471    0.000
Code
relationships_cfa1 <- 'relationships =~ aspire02 + aspire07 + aspire08 + aspire14 + aspire20 + aspire26'
relationships_cfa1_model <- cfa(relationships_cfa1, data=leanne.clean)
summary(relationships_cfa1_model)
lavaan 0.6.17 ended normally after 26 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        12

  Number of observations                           310

Model Test User Model:
                                                      
  Test statistic                                52.636
  Degrees of freedom                                 9
  P-value (Chi-square)                           0.000

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)
  relationships =~                                    
    aspire02          1.000                           
    aspire07          0.767    0.124    6.188    0.000
    aspire08          0.733    0.103    7.108    0.000
    aspire14          1.276    0.122   10.452    0.000
    aspire20          1.204    0.127    9.450    0.000
    aspire26          1.106    0.122    9.066    0.000

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)
   .aspire02          0.807    0.077   10.477    0.000
   .aspire07          1.674    0.141   11.909    0.000
   .aspire08          1.038    0.089   11.649    0.000
   .aspire14          0.482    0.068    7.109    0.000
   .aspire20          0.980    0.097   10.072    0.000
   .aspire26          1.005    0.096   10.514    0.000
    relationships     0.565    0.098    5.746    0.000

#20 iterations, stable solution was reached.

#ML (maximum likelihood, great)

#Compare no. of parameters and no. of data points: number of data points p(p+1) / 2, where p = number of observed variables. Number of parameters cannot exceed number of data points, if so the model is under-identified and cannot be estimated using SEM

#Discrepancy in calculation compared to lecture ??? - from minute 12:48

#what is the optimal number of iterations in determining model convergence?

#Items with high variances are not strong contributors to the latent factors

Latent Variable No. free of Parameters No. of data points No. of Iterations Chi-square test stat
Fame 22 (23?) 66 30 555.306
Altruism 12 (13?) 21 26 77.859
Finances 8 (9?) 10 24 76.670
Relationships 12 (13?) 21 26 52.636

#Not covered in class, ask about or try fixing loading to 1

By default, lavaan gives the latent variables its scale by fixing the loading of the first item to 1 (also known as the marker method). Other options are possible (for example, fixing the variance of the latent variable to 1). In order to free a parameter, put NA* in front of the parameter to be freed, to fix a parameter to 1, put 1* in front of the parameter to be fixed. For example, the syntax NA*q03 would free the loading of the first item because by default marker method fixes it to one, and f ~~ 1*f would fix the variance of the latent factor to one.

#Visualize it with the semPaths function from the semPlot package

Code
semPlot::semPaths(fame_cfa1_model, whatLabels = "est")

Code
semPlot::semPaths(altruism_cfa1_model, whatLabels = "est")

Code
semPlot::semPaths(finances_cfa1_model, whatLabels = "est")

Code
semPlot::semPaths(relationships_cfa1_model, whatLabels = "est")