0. Tải gói lệnh cần thiết

require(lavaan)
require(tidyverse)
library(semPlot)

1.Đọc dữ liệu và giới thiệu dữ liệu

Bộ số liệu HolzingerSwineford1939 có sẵn trong gói lavaan

#?HolzingerSwineford1939
str(HolzingerSwineford1939)

## 'data.frame':    301 obs. of  15 variables:
##  $ id    : int  1 2 3 4 5 6 7 8 9 11 ...
##  $ sex   : int  1 2 2 1 2 2 1 2 2 2 ...
##  $ ageyr : int  13 13 13 13 12 14 12 12 13 12 ...
##  $ agemo : int  1 7 1 2 2 1 1 2 0 5 ...
##  $ school: Factor w/ 2 levels "Grant-White",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ grade : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ x1    : num  3.33 5.33 4.5 5.33 4.83 ...
##  $ x2    : num  7.75 5.25 5.25 7.75 4.75 5 6 6.25 5.75 5.25 ...
##  $ x3    : num  0.375 2.125 1.875 3 0.875 ...
##  $ x4    : num  2.33 1.67 1 2.67 2.67 ...
##  $ x5    : num  5.75 3 1.75 4.5 4 3 6 4.25 5.75 5 ...
##  $ x6    : num  1.286 1.286 0.429 2.429 2.571 ...
##  $ x7    : num  3.39 3.78 3.26 3 3.7 ...
##  $ x8    : num  5.75 6.25 3.9 5.3 6.3 6.65 6.2 5.15 4.65 4.55 ...
##  $ x9    : num  6.36 7.92 4.42 4.86 5.92 ...

d <- HolzingerSwineford1939
d1 <- HolzingerSwineford1939 %>% select(x1:x9)

Nguồn dữ liệu gốc

MBESS package (HS)

#?MBESS
require(MBESS)
data(HS)
str(HS)

## 'data.frame':    301 obs. of  34 variables:
##  $ id                              : int  1 2 3 4 5 6 7 8 9 11 ...
##  $ sex                             : Factor w/ 2 levels "Female","Male": 2 1 1 2 1 1 2 1 1 1 ...
##  $ grade                           : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ age                             : int  13 13 13 13 12 14 12 12 13 12 ...
##  $ month_since_birthday            : int  1 7 1 2 2 1 1 2 0 5 ...
##  $ age_months                      : int  157 163 157 158 146 169 145 146 156 149 ...
##  $ age_years                       : num  13.1 13.6 13.1 13.2 12.2 ...
##  $ school                          : Factor w/ 2 levels "Grant-White",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ t1_visual_perception            : int  20 32 27 32 29 32 17 34 27 21 ...
##  $ t2_cubes                        : int  31 21 21 31 19 20 24 25 23 21 ...
##  $ t3_paper_form_board             : int  12 12 12 16 12 11 12 13 11 10 ...
##  $ t4_lozenges                     : int  3 17 15 24 7 18 8 15 12 6 ...
##  $ t5_general_information          : int  40 34 20 42 37 31 40 29 29 33 ...
##  $ t6_paragraph_comprehension      : int  7 5 3 8 8 3 10 11 8 8 ...
##  $ t7_sentence                     : int  23 12 7 18 16 12 24 17 23 20 ...
##  $ t8_word_classification          : int  22 22 12 21 25 25 32 25 19 25 ...
##  $ t9_word_meaning                 : int  9 9 3 17 18 6 20 9 19 18 ...
##  $ t10_addition                    : int  78 87 75 69 85 100 108 78 104 95 ...
##  $ t11_code                        : int  74 84 49 65 63 92 65 80 52 74 ...
##  $ t12_counting_groups_of_dots     : int  115 125 78 106 126 133 124 103 93 91 ...
##  $ t13_straight_and_curved_capitals: int  229 285 159 175 213 270 175 132 265 157 ...
##  $ t14_word_recognition            : int  170 184 170 181 187 164 121 184 184 175 ...
##  $ t15_number_recognition          : int  86 85 85 80 99 84 71 95 91 92 ...
##  $ t16_figure_recognition          : int  96 100 95 91 104 104 78 106 105 100 ...
##  $ t17_object_number               : int  6 12 1 5 15 6 4 11 18 5 ...
##  $ t18_number_figure               : int  9 12 5 3 14 6 3 13 6 8 ...
##  $ t19_figure_word                 : int  16 10 6 10 14 14 5 9 11 11 ...
##  $ t20_deduction                   : int  3 -3 -3 -2 29 9 18 15 12 33 ...
##  $ t21_numerical_puzzles           : int  14 13 9 10 15 2 10 9 15 8 ...
##  $ t22_problem_reasoning           : int  34 21 18 22 19 16 19 22 18 25 ...
##  $ t23_series_completion           : int  5 1 7 6 4 10 3 18 17 8 ...
##  $ t24_woody_mccall                : int  24 12 20 19 20 22 15 24 18 16 ...
##  $ t25_paper_form_board_r          : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ t26_flags                       : int  NA NA NA NA NA NA NA NA NA NA ...

Mô tả dữ liệu:

Dữ liệu của Holzinger và Swineford (1939) gồm điểm các bài test (mental ability tests) học sinh lớp 7-8 từ hai trường Pasteur và Grant-White. id: Identifier sex:Gender ageyr: Age, year part agemo: Age, month part schoolSchool: (Pasteur or Grant-White) grade: Grade x1: Visual perception (= t1_visual_perception/6) (scores on visual perception test, test 1)

range(d1$x1)

## [1] 0.6666667 8.5000000

range(HS$t1_visual_perception)

## [1]  4 51

HS$t1_visual_perception[1]/d1$x1[1]

## [1] 6

x2: Cubes (t2_cubes/4) (scores on cubes test, test 2)

range(d1$x2)

## [1] 2.25 9.25

range(HS$t2_cubes)

## [1]  9 37

HS$t2_cubes[1]/d1$x2[1]

## [1] 4

x3: Lozenges (t4_lozenges/8) (scores on lozenges test, test 4)

range(d1$x3)

## [1] 0.25 4.50

range(HS$t4_lozenges)

## [1]  2 36

HS$t4_lozenges[1]/d1$x3[1]

## [1] 8

x4: Paragraph comprehension (t6_paragraph_comprehension/3)(scores on paragraph comprehension test, test 6)

range(d1$x4)

## [1] 0.000000 6.333333

range(HS$t6_paragraph_comprehension)

## [1]  0 19

HS$t6_paragraph_comprehension[1]/d1$x4[1]

## [1] 3

x5: Sentence completion (t7_sentence/4)(scores on sentence completion test, test 7)

range(d1$x5)

## [1] 1 7

range(HS$t7_sentence)

## [1]  4 28

HS$t7_sentence[1]/d1$x5[1]

## [1] 4

x6: Word meaning (t9_word_meaning/7)(scores on word meaning test, test 9)

range(d1$x6)

## [1] 0.1428571 6.1428571

range(HS$t9_word_meaning)

## [1]  1 43

HS$t9_word_meaning[1]/d1$x6[1]

## [1] 7

x7: Speeded addition (t10_addition/23)(scores on add test, test 10)

range(d1$x7)

## [1] 1.304348 7.434783

range(HS$t10_addition)

## [1]  30 171

HS$t10_addition[1]/d1$x7[1]

## [1] 23

x8: Speeded counting of dots (t12_counting_groups_of_dots/20)(scores on counting groups of dots test, test 12)

range(d1$x8)

## [1]  3.05 10.00

range(HS$t12_counting_groups_of_dots)

## [1]  61 200

HS$t12_counting_groups_of_dots[1]/d1$x8[1]

## [1] 20

x9: Speeded discrimination straight and curved capitals (t13_straight_and_curved_capitals/36)(scores on straight and curved capitals test, test 13)

range(d1$x9)

## [1] 2.777778 9.250000

range(HS$t13_straight_and_curved_capitals)

## [1] 100 333

HS$t13_straight_and_curved_capitals[1]/d1$x9[1]

## [1] 36

2. A first example: confirmatory factor analysis (CFA)

Hình thành các biến ẩn
visual (trực quan) =~ được đo bởi x1 (visual perception: thị giác) + x2 (cubes: hình khối) + x3 (lozenges: hình thoi)

textual (văn bản)=~ được đo bởi x4 (Paragraph comprehension: đọc hiểu đoạn văn) + x5 (Sentence completion: hoàn thành câu) + x6 (Word meaning: hiểu nghĩa từ)

speed (tốc độ) =~ được đo bởi x7 (Speeded addition: cộng nhanh) + x8 (Speeded counting of dots: đếm nhanh số chấm) + x9 (Speeded discrimination straight and curved capitals: phân biệt nhanh chữ viết hoa thẳng hoặc cong)

HS.model <- ' visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ x7 + x8 + x9 '
fit <- cfa(HS.model, data=HolzingerSwineford1939)
fit

## lavaan 0.6-8 ended normally after 35 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        21
##                                                       
##   Number of observations                           301
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                85.306
##   Degrees of freedom                                24
##   P-value (Chi-square)                           0.000

#str(fit)

xem chi tiết nhưng chưa có các chỉ số đo độ phù hợp

summary(fit)

## lavaan 0.6-8 ended normally after 35 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        21
##                                                       
##   Number of observations                           301
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                85.306
##   Degrees of freedom                                24
##   P-value (Chi-square)                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual =~                                           
##     x1                1.000                           
##     x2                0.554    0.100    5.554    0.000
##     x3                0.729    0.109    6.685    0.000
##   textual =~                                          
##     x4                1.000                           
##     x5                1.113    0.065   17.014    0.000
##     x6                0.926    0.055   16.703    0.000
##   speed =~                                            
##     x7                1.000                           
##     x8                1.180    0.165    7.152    0.000
##     x9                1.082    0.151    7.155    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual ~~                                           
##     textual           0.408    0.074    5.552    0.000
##     speed             0.262    0.056    4.660    0.000
##   textual ~~                                          
##     speed             0.173    0.049    3.518    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .x1                0.549    0.114    4.833    0.000
##    .x2                1.134    0.102   11.146    0.000
##    .x3                0.844    0.091    9.317    0.000
##    .x4                0.371    0.048    7.779    0.000
##    .x5                0.446    0.058    7.642    0.000
##    .x6                0.356    0.043    8.277    0.000
##    .x7                0.799    0.081    9.823    0.000
##    .x8                0.488    0.074    6.573    0.000
##    .x9                0.566    0.071    8.003    0.000
##     visual            0.809    0.145    5.564    0.000
##     textual           0.979    0.112    8.737    0.000
##     speed             0.384    0.086    4.451    0.000

Có các chỉ số fit của mô hình

summary(fit, fit.measures=TRUE)

## lavaan 0.6-8 ended normally after 35 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        21
##                                                       
##   Number of observations                           301
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                85.306
##   Degrees of freedom                                24
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                               918.852
##   Degrees of freedom                                36
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.931
##   Tucker-Lewis Index (TLI)                       0.896
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -3737.745
##   Loglikelihood unrestricted model (H1)      -3695.092
##                                                       
##   Akaike (AIC)                                7517.490
##   Bayesian (BIC)                              7595.339
##   Sample-size adjusted Bayesian (BIC)         7528.739
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.092
##   90 Percent confidence interval - lower         0.071
##   90 Percent confidence interval - upper         0.114
##   P-value RMSEA <= 0.05                          0.001
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.065
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual =~                                           
##     x1                1.000                           
##     x2                0.554    0.100    5.554    0.000
##     x3                0.729    0.109    6.685    0.000
##   textual =~                                          
##     x4                1.000                           
##     x5                1.113    0.065   17.014    0.000
##     x6                0.926    0.055   16.703    0.000
##   speed =~                                            
##     x7                1.000                           
##     x8                1.180    0.165    7.152    0.000
##     x9                1.082    0.151    7.155    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual ~~                                           
##     textual           0.408    0.074    5.552    0.000
##     speed             0.262    0.056    4.660    0.000
##   textual ~~                                          
##     speed             0.173    0.049    3.518    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .x1                0.549    0.114    4.833    0.000
##    .x2                1.134    0.102   11.146    0.000
##    .x3                0.844    0.091    9.317    0.000
##    .x4                0.371    0.048    7.779    0.000
##    .x5                0.446    0.058    7.642    0.000
##    .x6                0.356    0.043    8.277    0.000
##    .x7                0.799    0.081    9.823    0.000
##    .x8                0.488    0.074    6.573    0.000
##    .x9                0.566    0.071    8.003    0.000
##     visual            0.809    0.145    5.564    0.000
##     textual           0.979    0.112    8.737    0.000
##     speed             0.384    0.086    4.451    0.000

#show(fit)

fitMeasures(fit, c("chisq", "df", "pvalue", "gfi","cfi","tli", "rmsea"))

##  chisq     df pvalue    gfi    cfi    tli  rmsea 
## 85.306 24.000  0.000  0.943  0.931  0.896  0.092

Paths diagram

Lựa chọn tham số std hoặc est thể hiện các giá trị khác nhau trên các đường nối. Chi tiết type lệnh ?semPaths để biết thêm về cá lựa chọn

semPaths(fit, "std", edge.label.cex = 1, curvePivot = TRUE)

Quay hình với các giá trị lựa chọn của tham số rotation, rotation có thể lựa chọn bằng 1, 2, 3 hoặc 4 tùy theo ý người vẽ.

semPaths(fit, "std", edge.label.cex = 1, curvePivot = TRUE, 
         rotation = 2, intercepts = FALSE)

Modification Indices (chỉ số sửa đổi)

Có thể thực hiện bởi việc điều chỉnh tham số modindices = TRUE trong summary() hoặc làm trực tiếp bằng cách gọi hàm modindices() expected parameter change (EPC) values (column epc). 3 cột cuối chứa các giá trị EPC đã được chuẩn hóa (sepc.lv: chỉ chuẩ hóa biến ẩn; sepc.all: chuẩn hóa tất cả các biến; sepc.nox: chuẩn hóa tất cả các biến trừ biến quan sát ngoại sinh)

fit <- cfa(HS.model,
data = HolzingerSwineford1939)
modindices(fit, sort = TRUE, maximum.number = 5)

##        lhs op rhs     mi    epc sepc.lv sepc.all sepc.nox
## 30  visual =~  x9 36.411  0.577   0.519    0.515    0.515
## 76      x7 ~~  x8 34.145  0.536   0.536    0.859    0.859
## 28  visual =~  x7 18.631 -0.422  -0.380   -0.349   -0.349
## 78      x8 ~~  x9 14.946 -0.423  -0.423   -0.805   -0.805
## 33 textual =~  x3  9.151 -0.272  -0.269   -0.238   -0.238

Ta có thể lưu kết quả chạy ra một biến riêng của mình để chủ động thực hiện sắp xếp, lọc, trích xuất ra những thông tin theo ý mình.

Ví dụ ta muốn xem riêng factor loading; hoặc các phương sai và hiệp phương sai

fit <- cfa(HS.model,
data = HolzingerSwineford1939)
mi <- modindices(fit)
mi[mi$op == "=~",]

##        lhs op rhs     mi    epc sepc.lv sepc.all sepc.nox
## 25  visual =~  x4  1.211  0.077   0.069    0.059    0.059
## 26  visual =~  x5  7.441 -0.210  -0.189   -0.147   -0.147
## 27  visual =~  x6  2.843  0.111   0.100    0.092    0.092
## 28  visual =~  x7 18.631 -0.422  -0.380   -0.349   -0.349
## 29  visual =~  x8  4.295 -0.210  -0.189   -0.187   -0.187
## 30  visual =~  x9 36.411  0.577   0.519    0.515    0.515
## 31 textual =~  x1  8.903  0.350   0.347    0.297    0.297
## 32 textual =~  x2  0.017 -0.011  -0.011   -0.010   -0.010
## 33 textual =~  x3  9.151 -0.272  -0.269   -0.238   -0.238
## 34 textual =~  x7  0.098 -0.021  -0.021   -0.019   -0.019
## 35 textual =~  x8  3.359 -0.121  -0.120   -0.118   -0.118
## 36 textual =~  x9  4.796  0.138   0.137    0.136    0.136
## 37   speed =~  x1  0.014  0.024   0.015    0.013    0.013
## 38   speed =~  x2  1.580 -0.198  -0.123   -0.105   -0.105
## 39   speed =~  x3  0.716  0.136   0.084    0.075    0.075
## 40   speed =~  x4  0.003 -0.005  -0.003   -0.003   -0.003
## 41   speed =~  x5  0.201 -0.044  -0.027   -0.021   -0.021
## 42   speed =~  x6  0.273  0.044   0.027    0.025    0.025

mi[mi$op == "~~",]

##    lhs op rhs     mi    epc sepc.lv sepc.all sepc.nox
## 43  x1 ~~  x2  3.606 -0.184  -0.184   -0.233   -0.233
## 44  x1 ~~  x3  0.935 -0.139  -0.139   -0.203   -0.203
## 45  x1 ~~  x4  3.554  0.078   0.078    0.173    0.173
## 46  x1 ~~  x5  0.522 -0.033  -0.033   -0.067   -0.067
## 47  x1 ~~  x6  0.048  0.009   0.009    0.020    0.020
## 48  x1 ~~  x7  5.420 -0.129  -0.129   -0.195   -0.195
## 49  x1 ~~  x8  0.634 -0.041  -0.041   -0.079   -0.079
## 50  x1 ~~  x9  7.335  0.138   0.138    0.247    0.247
## 51  x2 ~~  x3  8.532  0.218   0.218    0.223    0.223
## 52  x2 ~~  x4  0.534 -0.034  -0.034   -0.052   -0.052
## 53  x2 ~~  x5  0.023 -0.008  -0.008   -0.011   -0.011
## 54  x2 ~~  x6  0.785  0.039   0.039    0.062    0.062
## 55  x2 ~~  x7  8.918 -0.183  -0.183   -0.192   -0.192
## 56  x2 ~~  x8  0.054 -0.012  -0.012   -0.017   -0.017
## 57  x2 ~~  x9  1.895  0.075   0.075    0.094    0.094
## 58  x3 ~~  x4  0.142 -0.016  -0.016   -0.028   -0.028
## 59  x3 ~~  x5  7.858 -0.130  -0.130   -0.212   -0.212
## 60  x3 ~~  x6  1.855  0.055   0.055    0.100    0.100
## 61  x3 ~~  x7  0.638 -0.044  -0.044   -0.054   -0.054
## 62  x3 ~~  x8  0.059 -0.012  -0.012   -0.019   -0.019
## 63  x3 ~~  x9  4.126  0.102   0.102    0.147    0.147
## 64  x4 ~~  x5  2.534  0.186   0.186    0.457    0.457
## 65  x4 ~~  x6  6.220 -0.235  -0.235   -0.646   -0.646
## 66  x4 ~~  x7  5.920  0.098   0.098    0.180    0.180
## 67  x4 ~~  x8  3.805 -0.069  -0.069   -0.162   -0.162
## 68  x4 ~~  x9  0.196 -0.016  -0.016   -0.035   -0.035
## 69  x5 ~~  x6  0.916  0.101   0.101    0.253    0.253
## 70  x5 ~~  x7  1.233 -0.049  -0.049   -0.083   -0.083
## 71  x5 ~~  x8  0.347  0.023   0.023    0.049    0.049
## 72  x5 ~~  x9  0.999  0.040   0.040    0.079    0.079
## 73  x6 ~~  x7  0.259 -0.020  -0.020   -0.037   -0.037
## 74  x6 ~~  x8  0.275  0.018   0.018    0.043    0.043
## 75  x6 ~~  x9  0.097 -0.011  -0.011   -0.024   -0.024
## 76  x7 ~~  x8 34.145  0.536   0.536    0.859    0.859
## 77  x7 ~~  x9  5.183 -0.187  -0.187   -0.278   -0.278
## 78  x8 ~~  x9 14.946 -0.423  -0.423   -0.805   -0.805

Trích xuất thông tin từ mô hình ước lượng

NA ứng với các tham số đã được cố định giá trị

fit <- cfa(HS.model, data=HolzingerSwineford1939)
parameterEstimates(fit)

##        lhs op     rhs   est    se      z pvalue ci.lower ci.upper
## 1   visual =~      x1 1.000 0.000     NA     NA    1.000    1.000
## 2   visual =~      x2 0.554 0.100  5.554      0    0.358    0.749
## 3   visual =~      x3 0.729 0.109  6.685      0    0.516    0.943
## 4  textual =~      x4 1.000 0.000     NA     NA    1.000    1.000
## 5  textual =~      x5 1.113 0.065 17.014      0    0.985    1.241
## 6  textual =~      x6 0.926 0.055 16.703      0    0.817    1.035
## 7    speed =~      x7 1.000 0.000     NA     NA    1.000    1.000
## 8    speed =~      x8 1.180 0.165  7.152      0    0.857    1.503
## 9    speed =~      x9 1.082 0.151  7.155      0    0.785    1.378
## 10      x1 ~~      x1 0.549 0.114  4.833      0    0.326    0.772
## 11      x2 ~~      x2 1.134 0.102 11.146      0    0.934    1.333
## 12      x3 ~~      x3 0.844 0.091  9.317      0    0.667    1.022
## 13      x4 ~~      x4 0.371 0.048  7.779      0    0.278    0.465
## 14      x5 ~~      x5 0.446 0.058  7.642      0    0.332    0.561
## 15      x6 ~~      x6 0.356 0.043  8.277      0    0.272    0.441
## 16      x7 ~~      x7 0.799 0.081  9.823      0    0.640    0.959
## 17      x8 ~~      x8 0.488 0.074  6.573      0    0.342    0.633
## 18      x9 ~~      x9 0.566 0.071  8.003      0    0.427    0.705
## 19  visual ~~  visual 0.809 0.145  5.564      0    0.524    1.094
## 20 textual ~~ textual 0.979 0.112  8.737      0    0.760    1.199
## 21   speed ~~   speed 0.384 0.086  4.451      0    0.215    0.553
## 22  visual ~~ textual 0.408 0.074  5.552      0    0.264    0.552
## 23  visual ~~   speed 0.262 0.056  4.660      0    0.152    0.373
## 24 textual ~~   speed 0.173 0.049  3.518      0    0.077    0.270

lhs (vế trái), op (toán tử), rhs (vế phải); Các cột est(ước lượng điểm),se(sai số chuẩn), z (z-value) và pvalue (p-value) cho các tham số ci.lower, ci.upper chặn dưới, chặn trên của khoảng ước lượng 95% cho giá trị cần ước lượng

ước lượng tham số đã được chuẩn hóa

standardizedSolution(fit)

##        lhs op     rhs est.std    se      z pvalue ci.lower ci.upper
## 1   visual =~      x1   0.772 0.055 14.041      0    0.664    0.880
## 2   visual =~      x2   0.424 0.060  7.105      0    0.307    0.540
## 3   visual =~      x3   0.581 0.055 10.539      0    0.473    0.689
## 4  textual =~      x4   0.852 0.023 37.776      0    0.807    0.896
## 5  textual =~      x5   0.855 0.022 38.273      0    0.811    0.899
## 6  textual =~      x6   0.838 0.023 35.881      0    0.792    0.884
## 7    speed =~      x7   0.570 0.053 10.714      0    0.465    0.674
## 8    speed =~      x8   0.723 0.051 14.309      0    0.624    0.822
## 9    speed =~      x9   0.665 0.051 13.015      0    0.565    0.765
## 10      x1 ~~      x1   0.404 0.085  4.763      0    0.238    0.571
## 11      x2 ~~      x2   0.821 0.051 16.246      0    0.722    0.920
## 12      x3 ~~      x3   0.662 0.064 10.334      0    0.537    0.788
## 13      x4 ~~      x4   0.275 0.038  7.157      0    0.200    0.350
## 14      x5 ~~      x5   0.269 0.038  7.037      0    0.194    0.344
## 15      x6 ~~      x6   0.298 0.039  7.606      0    0.221    0.374
## 16      x7 ~~      x7   0.676 0.061 11.160      0    0.557    0.794
## 17      x8 ~~      x8   0.477 0.073  6.531      0    0.334    0.620
## 18      x9 ~~      x9   0.558 0.068  8.208      0    0.425    0.691
## 19  visual ~~  visual   1.000 0.000     NA     NA    1.000    1.000
## 20 textual ~~ textual   1.000 0.000     NA     NA    1.000    1.000
## 21   speed ~~   speed   1.000 0.000     NA     NA    1.000    1.000
## 22  visual ~~ textual   0.459 0.064  7.189      0    0.334    0.584
## 23  visual ~~   speed   0.471 0.073  6.461      0    0.328    0.613
## 24 textual ~~   speed   0.283 0.069  4.117      0    0.148    0.418

Phương sai và hiệp phương sai

fit <- cfa(HS.model, data = HolzingerSwineford1939)
fitted(fit)

## $cov
##    x1    x2    x3    x4    x5    x6    x7    x8    x9   
## x1 1.358                                                
## x2 0.448 1.382                                          
## x3 0.590 0.327 1.275                                    
## x4 0.408 0.226 0.298 1.351                              
## x5 0.454 0.252 0.331 1.090 1.660                        
## x6 0.378 0.209 0.276 0.907 1.010 1.196                  
## x7 0.262 0.145 0.191 0.173 0.193 0.161 1.183            
## x8 0.309 0.171 0.226 0.205 0.228 0.190 0.453 1.022      
## x9 0.284 0.157 0.207 0.188 0.209 0.174 0.415 0.490 1.015

#fitted.values(fit)

Các phần dư chưa được chuẩn hóa

the difference between the observed and implied covariance matrix and mean vector

fit <- cfa(HS.model, data = HolzingerSwineford1939)
resid(fit)

## $type
## [1] "raw"
## 
## $cov
##    x1     x2     x3     x4     x5     x6     x7     x8     x9    
## x1  0.000                                                        
## x2 -0.041  0.000                                                 
## x3 -0.010  0.124  0.000                                          
## x4  0.097 -0.017 -0.090  0.000                                   
## x5 -0.014 -0.040 -0.219  0.008  0.000                            
## x6  0.077  0.038 -0.032 -0.012  0.005  0.000                     
## x7 -0.177 -0.242 -0.103  0.046 -0.050 -0.017  0.000              
## x8 -0.046 -0.062 -0.013 -0.079 -0.047 -0.024  0.082  0.000       
## x9  0.175  0.087  0.167  0.056  0.086  0.062 -0.042 -0.032  0.000

#residuals(fit)

Thông tin về độ phù hợp

fit <- cfa(HS.model, data=HolzingerSwineford1939)
fitMeasures(fit)

##                npar                fmin               chisq                  df 
##              21.000               0.142              85.306              24.000 
##              pvalue      baseline.chisq         baseline.df     baseline.pvalue 
##               0.000             918.852              36.000               0.000 
##                 cfi                 tli                nnfi                 rfi 
##               0.931               0.896               0.896               0.861 
##                 nfi                pnfi                 ifi                 rni 
##               0.907               0.605               0.931               0.931 
##                logl   unrestricted.logl                 aic                 bic 
##           -3737.745           -3695.092            7517.490            7595.339 
##              ntotal                bic2               rmsea      rmsea.ci.lower 
##             301.000            7528.739               0.092               0.071 
##      rmsea.ci.upper        rmsea.pvalue                 rmr          rmr_nomean 
##               0.114               0.001               0.082               0.082 
##                srmr        srmr_bentler srmr_bentler_nomean                crmr 
##               0.065               0.065               0.065               0.073 
##         crmr_nomean          srmr_mplus   srmr_mplus_nomean               cn_05 
##               0.073               0.065               0.065             129.490 
##               cn_01                 gfi                agfi                pgfi 
##             152.654               0.943               0.894               0.503 
##                 mfi                ecvi 
##               0.903               0.423

có thể gọi riêng từng chỉ số để tiện quan sát

fit <- cfa(HS.model, data=HolzingerSwineford1939)
fitMeasures(fit, "cfi")

##   cfi 
## 0.931

hoặc

fitMeasures(fit, c("cfi","rmsea","srmr"))

##   cfi rmsea  srmr 
## 0.931 0.092 0.065

nhìn rõ cấu các tham số trong mô hình cần ước lượng, những tham số khác 0 tức là để tự do và cần ước lượng

fit <- cfa(HS.model, data=HolzingerSwineford1939)
lavInspect(fit)

## $lambda
##    visual textul speed
## x1      0      0     0
## x2      1      0     0
## x3      2      0     0
## x4      0      0     0
## x5      0      3     0
## x6      0      4     0
## x7      0      0     0
## x8      0      0     5
## x9      0      0     6
## 
## $theta
##    x1 x2 x3 x4 x5 x6 x7 x8 x9
## x1  7                        
## x2  0  8                     
## x3  0  0  9                  
## x4  0  0  0 10               
## x5  0  0  0  0 11            
## x6  0  0  0  0  0 12         
## x7  0  0  0  0  0  0 13      
## x8  0  0  0  0  0  0  0 14   
## x9  0  0  0  0  0  0  0  0 15
## 
## $psi
##         visual textul speed
## visual  16                 
## textual 19     17          
## speed   20     21     18

Các giá trị khởi tạo

lavInspect(fit, what = "start")

## $lambda
##    visual textul speed
## x1  1.000  0.000 0.000
## x2  0.778  0.000 0.000
## x3  1.107  0.000 0.000
## x4  0.000  1.000 0.000
## x5  0.000  1.133 0.000
## x6  0.000  0.924 0.000
## x7  0.000  0.000 1.000
## x8  0.000  0.000 1.225
## x9  0.000  0.000 0.854
## 
## $theta
##    x1    x2    x3    x4    x5    x6    x7    x8    x9   
## x1 0.679                                                
## x2 0.000 0.691                                          
## x3 0.000 0.000 0.637                                    
## x4 0.000 0.000 0.000 0.675                              
## x5 0.000 0.000 0.000 0.000 0.830                        
## x6 0.000 0.000 0.000 0.000 0.000 0.598                  
## x7 0.000 0.000 0.000 0.000 0.000 0.000 0.592            
## x8 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.511      
## x9 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.508
## 
## $psi
##         visual textul speed
## visual  0.05               
## textual 0.00   0.05        
## speed   0.00   0.00   0.05

Tìm hiểu thêm

#?lavInspect

’ visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ’

More about the syntax

Fixing parameters: cố định tham số f =~ y1 + 1y2 + 1y3 + 1y4 (cố định các factor loading=1) Mặc định trong CFA các biến ẩn ngoại sinh có tương quan với nhau, vì vậy nếu ta muốn áp đặt sự không tương quan thì cần can thiệp Kịch bản 1: # three-factor model visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ NAx7 + x8 + x9 (loading factor của x7 cho tự do, ko bị áp đặt bằng 1 như của x1 và x4) # orthogonal factors visual ~~ 0speed (visual và speed ko tương quan) textual ~~ 0speed (textual và speed ko tương quan) # fix variance of speed factor speed ~~ 1*speed (áp đặt phương sai của speed = 1)

HS.model.1 <- ' visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ NA*x7 + x8 + x9 
visual ~~ 0*speed
textual ~~ 0*speed
speed ~~ 1*speed
'
fit <- cfa(HS.model.1, data=HolzingerSwineford1939)
summary(fit, fit.measures=TRUE)

## lavaan 0.6-8 ended normally after 30 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        19
##                                                       
##   Number of observations                           301
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                               117.946
##   Degrees of freedom                                26
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                               918.852
##   Degrees of freedom                                36
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.896
##   Tucker-Lewis Index (TLI)                       0.856
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -3754.065
##   Loglikelihood unrestricted model (H1)      -3695.092
##                                                       
##   Akaike (AIC)                                7546.131
##   Bayesian (BIC)                              7616.566
##   Sample-size adjusted Bayesian (BIC)         7556.308
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.108
##   90 Percent confidence interval - lower         0.089
##   90 Percent confidence interval - upper         0.129
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.125
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual =~                                           
##     x1                1.000                           
##     x2                0.559    0.105    5.300    0.000
##     x3                0.708    0.118    6.004    0.000
##   textual =~                                          
##     x4                1.000                           
##     x5                1.111    0.065   16.996    0.000
##     x6                0.925    0.055   16.703    0.000
##   speed =~                                            
##     x7                0.661    0.073    9.040    0.000
##     x8                0.810    0.074   10.899    0.000
##     x9                0.565    0.066    8.509    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual ~~                                           
##     speed             0.000                           
##   textual ~~                                          
##     speed             0.000                           
##   visual ~~                                           
##     textual           0.414    0.074    5.562    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     speed             1.000                           
##    .x1                0.536    0.129    4.155    0.000
##    .x2                1.125    0.103   10.965    0.000
##    .x3                0.863    0.095    9.085    0.000
##    .x4                0.369    0.048    7.735    0.000
##    .x5                0.449    0.059    7.662    0.000
##    .x6                0.356    0.043    8.263    0.000
##    .x7                0.746    0.086    8.650    0.000
##    .x8                0.366    0.097    3.794    0.000
##    .x9                0.696    0.072    9.640    0.000
##     visual            0.822    0.158    5.188    0.000
##     textual           0.981    0.112    8.745    0.000

Kịch bản 2 tất cả các biến ẩn không tương quan, ta chọn orthogonal = TRUE

HS.model.ortho <- ' visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ x7 + x8 + x9 '
fit <- cfa(HS.model.ortho, data=HolzingerSwineford1939,orthogonal = TRUE)
summary(fit, fit.measures=TRUE)

## lavaan 0.6-8 ended normally after 32 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        18
##                                                       
##   Number of observations                           301
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                               153.527
##   Degrees of freedom                                27
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                               918.852
##   Degrees of freedom                                36
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.857
##   Tucker-Lewis Index (TLI)                       0.809
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -3771.856
##   Loglikelihood unrestricted model (H1)      -3695.092
##                                                       
##   Akaike (AIC)                                7579.711
##   Bayesian (BIC)                              7646.439
##   Sample-size adjusted Bayesian (BIC)         7589.354
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.125
##   90 Percent confidence interval - lower         0.106
##   90 Percent confidence interval - upper         0.144
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.161
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual =~                                           
##     x1                1.000                           
##     x2                0.778    0.141    5.532    0.000
##     x3                1.107    0.214    5.173    0.000
##   textual =~                                          
##     x4                1.000                           
##     x5                1.133    0.067   16.906    0.000
##     x6                0.924    0.056   16.391    0.000
##   speed =~                                            
##     x7                1.000                           
##     x8                1.225    0.190    6.460    0.000
##     x9                0.854    0.121    7.046    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual ~~                                           
##     textual           0.000                           
##     speed             0.000                           
##   textual ~~                                          
##     speed             0.000                           
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .x1                0.835    0.118    7.064    0.000
##    .x2                1.065    0.105   10.177    0.000
##    .x3                0.633    0.129    4.899    0.000
##    .x4                0.382    0.049    7.805    0.000
##    .x5                0.416    0.059    7.038    0.000
##    .x6                0.369    0.044    8.367    0.000
##    .x7                0.746    0.086    8.650    0.000
##    .x8                0.366    0.097    3.794    0.000
##    .x9                0.696    0.072    9.640    0.000
##     visual            0.524    0.130    4.021    0.000
##     textual           0.969    0.112    8.640    0.000
##     speed             0.437    0.097    4.520    0.000

Kịch bản 3 tất cả các factor loading của các biến x1, x4, x7 đều ko bắt buộc phải = 1, ta chọn std.lv = TRUE (khi đó các phương sai của biến ẩn buộc phải =1)

HS.model <- ' visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ x7 + x8 + x9 '
fit <- cfa(HS.model, data=HolzingerSwineford1939,std.lv = TRUE)
summary(fit, fit.measures=TRUE)

## lavaan 0.6-8 ended normally after 20 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        21
##                                                       
##   Number of observations                           301
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                85.306
##   Degrees of freedom                                24
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                               918.852
##   Degrees of freedom                                36
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.931
##   Tucker-Lewis Index (TLI)                       0.896
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -3737.745
##   Loglikelihood unrestricted model (H1)      -3695.092
##                                                       
##   Akaike (AIC)                                7517.490
##   Bayesian (BIC)                              7595.339
##   Sample-size adjusted Bayesian (BIC)         7528.739
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.092
##   90 Percent confidence interval - lower         0.071
##   90 Percent confidence interval - upper         0.114
##   P-value RMSEA <= 0.05                          0.001
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.065
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual =~                                           
##     x1                0.900    0.081   11.128    0.000
##     x2                0.498    0.077    6.429    0.000
##     x3                0.656    0.074    8.817    0.000
##   textual =~                                          
##     x4                0.990    0.057   17.474    0.000
##     x5                1.102    0.063   17.576    0.000
##     x6                0.917    0.054   17.082    0.000
##   speed =~                                            
##     x7                0.619    0.070    8.903    0.000
##     x8                0.731    0.066   11.090    0.000
##     x9                0.670    0.065   10.305    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual ~~                                           
##     textual           0.459    0.064    7.189    0.000
##     speed             0.471    0.073    6.461    0.000
##   textual ~~                                          
##     speed             0.283    0.069    4.117    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .x1                0.549    0.114    4.833    0.000
##    .x2                1.134    0.102   11.146    0.000
##    .x3                0.844    0.091    9.317    0.000
##    .x4                0.371    0.048    7.779    0.000
##    .x5                0.446    0.058    7.642    0.000
##    .x6                0.356    0.043    8.277    0.000
##    .x7                0.799    0.081    9.823    0.000
##    .x8                0.488    0.074    6.573    0.000
##    .x9                0.566    0.071    8.003    0.000
##     visual            1.000                           
##     textual           1.000                           
##     speed             1.000

Bringing in the means (đưa hệ số chặn vào mô hình) x1 ~ 1 x2 ~ 1 x3 ~ 1 Ta có thể chỉ định từng biến quan sát có hệ số chặn hoặc cho hệ số chặn bổ sung cho tất cả bằng tham số meanstructure = TRUE

HS.model <- ' visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ x7 + x8 + x9 
x1 + x2 + x3 + x4 ~ 0.5*1 #(áp đặt hệ số chặn của x1,x2,x3,x4 bằng 0.5)
x5~0.6*1 # (áp đặt hệ số chặn của x5 bằng 0.6)
'
fit <- cfa(HS.model,
data = HolzingerSwineford1939,
meanstructure = TRUE)
summary(fit,fit.measures=TRUE)

## lavaan 0.6-8 ended normally after 95 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        25
##                                                       
##   Number of observations                           301
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                              1154.526
##   Degrees of freedom                                29
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                               918.852
##   Degrees of freedom                                36
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.000
##   Tucker-Lewis Index (TLI)                      -0.583
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -4272.355
##   Loglikelihood unrestricted model (H1)      -3695.092
##                                                       
##   Akaike (AIC)                                8594.711
##   Bayesian (BIC)                              8687.388
##   Sample-size adjusted Bayesian (BIC)         8608.103
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.359
##   90 Percent confidence interval - lower         0.342
##   90 Percent confidence interval - upper         0.377
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           6.830
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual =~                                           
##     x1                1.000                           
##     x2                1.229    0.020   60.605    0.000
##     x3                0.405    0.013   30.661    0.000
##   textual =~                                          
##     x4                1.000                           
##     x5                1.428    0.024   58.425    0.000
##     x6                1.057    0.020   53.968    0.000
##   speed =~                                            
##     x7                1.000                           
##     x8                1.187    0.058   20.438    0.000
##     x9                1.053    0.055   19.080    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   visual ~~                                           
##     textual          11.914    1.011   11.783    0.000
##     speed             5.438    0.521   10.443    0.000
##   textual ~~                                          
##     speed             3.185    0.311   10.231    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .x1                0.500                           
##    .x2                0.500                           
##    .x3                0.500                           
##    .x4                0.500                           
##    .x5                0.600                           
##    .x6               -0.554    0.040  -13.833    0.000
##    .x7                3.017    0.061   49.372    0.000
##    .x8                4.139    0.056   74.133    0.000
##    .x9                4.143    0.056   73.769    0.000
##     visual            0.000                           
##     textual           0.000                           
##     speed             0.000                           
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .x1                0.508    0.098    5.166    0.000
##    .x2                1.629    0.192    8.504    0.000
##    .x3                0.972    0.083   11.691    0.000
##    .x4                0.438    0.045    9.726    0.000
##    .x5                0.413    0.061    6.757    0.000
##    .x6                0.355    0.041    8.660    0.000
##    .x7                0.793    0.076   10.408    0.000
##    .x8                0.472    0.061    7.743    0.000
##    .x9                0.582    0.062    9.401    0.000
##     visual           20.526    1.716   11.959    0.000
##     textual           7.471    0.644   11.596    0.000
##     speed             1.757    0.202    8.715    0.000

So sánh các mô hình

HS.model <- ' visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ x7 + x8 + x9 
'
fit1 <- cfa(HS.model, data=HolzingerSwineford1939)

HS.model.1 <- ' visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ NA*x7 + x8 + x9 
visual ~~ 0*speed
textual ~~ 0*speed
speed ~~ 1*speed
'
fit2 <- cfa(HS.model.1, data=HolzingerSwineford1939)

HS.model.ortho <- ' visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ x7 + x8 + x9 '
fit3 <- cfa(HS.model.ortho, data=HolzingerSwineford1939,std.lv = TRUE,orthogonal = TRUE)

# model comparison tests
lavTestLRT(fit1, fit2, fit3)

## Chi-Squared Difference Test
## 
##      Df    AIC    BIC   Chisq Chisq diff Df diff Pr(>Chisq)    
## fit1 24 7517.5 7595.3  85.305                                  
## fit2 26 7546.1 7616.6 117.946     32.641       2  8.169e-08 ***
## fit3 27 7579.7 7646.4 153.527     35.581       1  2.447e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Thực hành phân tích CFA với gói lavaan

Ngô Thị Thanh Nga -TLU

06/2021