1 生成模拟数据

# 自编数据
set.seed(123)
x1 <- rnorm(100, 3.2, 2.2)
x2 <- rnorm(100, 2.9, 3.1)
x3 <- rnorm(100, 4, 5.9)
x4 <- rnorm(100, 3, 16.7)
x5 <- rnorm(100, 5, 110.1)

x6 <- x4 - x5

y <- 97.72 + 5.77*x1 - 1.32 * x2 + 1.14 *  x3 + .27 * x6
y1 <- 7.72 + 3.77*x1 - 1.32 * x2 + 4.14 * x3 + .67 * x6
y2 <- rnorm(100, 12, 34)

mydata <- data.frame(y, x1, x2, x3, x4, y1, y2)

2 多元线性回归

# lm的方法
model.1 <- lm(y ~ x1 + x2 + x3 + x4, data = mydata)
summary(model.1)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4, data = mydata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -73.293 -20.446  -2.218  17.245  73.526 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 78.14450    7.09886  11.008  < 2e-16 ***
## x1           8.80007    1.47267   5.976 3.98e-08 ***
## x2           0.03703    0.97853   0.038   0.9699    
## x3           1.39423    0.52740   2.644   0.0096 ** 
## x4           0.31152    0.16922   1.841   0.0687 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.11 on 95 degrees of freedom
## Multiple R-squared:  0.3036, Adjusted R-squared:  0.2743 
## F-statistic: 10.35 on 4 and 95 DF,  p-value: 5.295e-07
#lavaan的方法
library(lavaan)

myModel <- 'y ~ x1 + x2 + x3 + x4'
fit <- sem(model = myModel, data = mydata)
summary(fit)
## lavaan 0.6-8 ended normally after 31 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
##                                                       
##   Number of observations                           100
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   y ~                                                 
##     x1                8.800    1.435    6.131    0.000
##     x2                0.037    0.954    0.039    0.969
##     x3                1.394    0.514    2.712    0.007
##     x4                0.312    0.165    1.889    0.059
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .y               805.266  113.882    7.071    0.000

可以发现,lavaan所计算出来的标准误和OLS有轻微的不同。在线性回归中,我们知道系数标准误为:\(\sigma^2_y(X^TX)^-\)
lm()命令中,有\(\hat{\sigma}_{y}^{2}=\frac{\sum_{i=1}^{n}\left(y_{i}-\hat{y}_{i}\right)^{2}}{n-(p+1)}\),而在lavaan所使用的极大似然方法中,有\(\hat{\sigma}_{y}^{2}=\frac{\sum_{i=1}^{n}\left(y_{i}-\hat{y}_{i}\right)^{2}}{n}\),这会导致lm中的标准误更大。

3 多变量回归

myModel.2 <- 'y1 ~ x1 + x2 + x3 + x4
  y2 ~ x1 + x2 + x3 + x4'
fit <- sem(model = myModel.2, data = mydata)
summary(fit)
## lavaan 0.6-8 ended normally after 90 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        11
##                                                       
##   Number of observations                           100
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   y1 ~                                                
##     x1               11.289    3.562    3.169    0.002
##     x2                2.047    2.367    0.865    0.387
##     x3                4.771    1.276    3.740    0.000
##     x4                0.773    0.409    1.889    0.059
##   y2 ~                                                
##     x1               -0.868    1.586   -0.547    0.584
##     x2                1.235    1.054    1.171    0.241
##     x3                0.018    0.568    0.033    0.974
##     x4               -0.179    0.182   -0.982    0.326
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##  .y1 ~~                                               
##    .y2             -482.990  226.013   -2.137    0.033
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .y1             4958.629  701.256    7.071    0.000
##    .y2              983.116  139.034    7.071    0.000

4 路径分析

myModel.3 <- 'y ~ x1 + x3 + x4
  y1 ~ y
  y2 ~ y1'
fit3 <- sem(model = myModel.3, data = mydata)
summary(fit)
## lavaan 0.6-8 ended normally after 90 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        11
##                                                       
##   Number of observations                           100
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   y1 ~                                                
##     x1               11.289    3.562    3.169    0.002
##     x2                2.047    2.367    0.865    0.387
##     x3                4.771    1.276    3.740    0.000
##     x4                0.773    0.409    1.889    0.059
##   y2 ~                                                
##     x1               -0.868    1.586   -0.547    0.584
##     x2                1.235    1.054    1.171    0.241
##     x3                0.018    0.568    0.033    0.974
##     x4               -0.179    0.182   -0.982    0.326
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##  .y1 ~~                                               
##    .y2             -482.990  226.013   -2.137    0.033
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .y1             4958.629  701.256    7.071    0.000
##    .y2              983.116  139.034    7.071    0.000
library(semPlot)
semPaths(fit3, whatLabels = 'std', edge.label.cex = 1, layout = 'tree')

对于外生变量来说,标准化后的方差其实就是\(1-R^2\),以\(y\)为例,就是说有70%的变异性不能由其解释变量解释。

5 中介分析

# 生成数据
x <- rnorm(200, 5, 17)
e1 <- rnorm(200, 0, 10)
e2 <- rnorm(200, 0, 10)
m <- .17 * x + e1
y <- .2 * m + .2 * x + e2

data <- data.frame(x, m, y)

myModel.4 <- "
  y ~ b * m + c * x
  m ~ a * x
  indirect := a * b
  direct := c
  total := c + (a * b)
"

fit4 <- sem(model = myModel.4, data = data, se = 'bootstrap')
summary(fit4)
## lavaan 0.6-8 ended normally after 16 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
##                                                       
##   Number of observations                           200
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                            Bootstrap
##   Number of requested bootstrap draws             1000
##   Number of successful bootstrap draws            1000
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   y ~                                                 
##     m          (b)    0.111    0.079    1.407    0.160
##     x          (c)    0.220    0.038    5.809    0.000
##   m ~                                                 
##     x          (a)    0.134    0.040    3.355    0.001
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .y               102.739   10.809    9.505    0.000
##    .m               106.223   10.090   10.528    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect          0.015    0.011    1.306    0.192
##     direct            0.220    0.038    5.806    0.000
##     total             0.235    0.038    6.190    0.000
semPaths(fit4, whatLabels = 'est', edge.label.cex = 1)

我们再设置一个更复杂的双路径中介效应:

# 生成数据
x <- rnorm(200, 5, 17)
e1 <- rnorm(200, 0, 10)
e2 <- rnorm(200, 0, 10)
e3 <- rnorm(200, 0, 10)
m1 <- .17 * x + e1
m2 <- .3 * x + e2
y <- .2 * m1 + .2 * x - .1 * m2 + e3

data <- data.frame(x, m1, m2, y)

myModel.5 <- "
  y ~ a * m1 + b * m2 + c * x
  m1 ~ d * x
  m2 ~ e * x
  indirect := d * a + e * b
  direct := c
  total := c + d * a + e * b
"

fit5 <- sem(model = myModel.5, data = data, se = 'bootstrap')
summary(fit5)
## lavaan 0.6-8 ended normally after 20 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         8
##                                                       
##   Number of observations                           200
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                 0.113
##   Degrees of freedom                                 1
##   P-value (Chi-square)                           0.737
## 
## Parameter Estimates:
## 
##   Standard errors                            Bootstrap
##   Number of requested bootstrap draws             1000
##   Number of successful bootstrap draws            1000
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   y ~                                                 
##     m1         (a)    0.166    0.068    2.437    0.015
##     m2         (b)   -0.013    0.067   -0.191    0.848
##     x          (c)    0.134    0.052    2.602    0.009
##   m1 ~                                                
##     x          (d)    0.131    0.049    2.679    0.007
##   m2 ~                                                
##     x          (e)    0.350    0.047    7.512    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .y               103.886    9.492   10.945    0.000
##    .m1              108.410    9.398   11.535    0.000
##    .m2              112.083   11.393    9.838    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     indirect          0.017    0.027    0.631    0.528
##     direct            0.134    0.052    2.601    0.009
##     total             0.151    0.043    3.544    0.000
semPaths(fit5, whatLabels = 'est', edge.label.cex = 1, layout = 'tree')