# 自编数据
set.seed(123)
x1 <- rnorm(100, 3.2, 2.2)
x2 <- rnorm(100, 2.9, 3.1)
x3 <- rnorm(100, 4, 5.9)
x4 <- rnorm(100, 3, 16.7)
x5 <- rnorm(100, 5, 110.1)
x6 <- x4 - x5
y <- 97.72 + 5.77*x1 - 1.32 * x2 + 1.14 * x3 + .27 * x6
y1 <- 7.72 + 3.77*x1 - 1.32 * x2 + 4.14 * x3 + .67 * x6
y2 <- rnorm(100, 12, 34)
mydata <- data.frame(y, x1, x2, x3, x4, y1, y2)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4, data = mydata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -73.293 -20.446 -2.218 17.245 73.526
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 78.14450 7.09886 11.008 < 2e-16 ***
## x1 8.80007 1.47267 5.976 3.98e-08 ***
## x2 0.03703 0.97853 0.038 0.9699
## x3 1.39423 0.52740 2.644 0.0096 **
## x4 0.31152 0.16922 1.841 0.0687 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.11 on 95 degrees of freedom
## Multiple R-squared: 0.3036, Adjusted R-squared: 0.2743
## F-statistic: 10.35 on 4 and 95 DF, p-value: 5.295e-07
#lavaan的方法
library(lavaan)
myModel <- 'y ~ x1 + x2 + x3 + x4'
fit <- sem(model = myModel, data = mydata)
summary(fit)
## lavaan 0.6-8 ended normally after 31 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 100
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## y ~
## x1 8.800 1.435 6.131 0.000
## x2 0.037 0.954 0.039 0.969
## x3 1.394 0.514 2.712 0.007
## x4 0.312 0.165 1.889 0.059
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .y 805.266 113.882 7.071 0.000
可以发现,lavaan所计算出来的标准误和OLS有轻微的不同。在线性回归中,我们知道系数标准误为:\(\sigma^2_y(X^TX)^-\)。
在lm()
命令中,有\(\hat{\sigma}_{y}^{2}=\frac{\sum_{i=1}^{n}\left(y_{i}-\hat{y}_{i}\right)^{2}}{n-(p+1)}\),而在lavaan
所使用的极大似然方法中,有\(\hat{\sigma}_{y}^{2}=\frac{\sum_{i=1}^{n}\left(y_{i}-\hat{y}_{i}\right)^{2}}{n}\),这会导致lm
中的标准误更大。
myModel.2 <- 'y1 ~ x1 + x2 + x3 + x4
y2 ~ x1 + x2 + x3 + x4'
fit <- sem(model = myModel.2, data = mydata)
summary(fit)
## lavaan 0.6-8 ended normally after 90 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 11
##
## Number of observations 100
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## y1 ~
## x1 11.289 3.562 3.169 0.002
## x2 2.047 2.367 0.865 0.387
## x3 4.771 1.276 3.740 0.000
## x4 0.773 0.409 1.889 0.059
## y2 ~
## x1 -0.868 1.586 -0.547 0.584
## x2 1.235 1.054 1.171 0.241
## x3 0.018 0.568 0.033 0.974
## x4 -0.179 0.182 -0.982 0.326
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## .y1 ~~
## .y2 -482.990 226.013 -2.137 0.033
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .y1 4958.629 701.256 7.071 0.000
## .y2 983.116 139.034 7.071 0.000
myModel.3 <- 'y ~ x1 + x3 + x4
y1 ~ y
y2 ~ y1'
fit3 <- sem(model = myModel.3, data = mydata)
summary(fit)
## lavaan 0.6-8 ended normally after 90 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 11
##
## Number of observations 100
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## y1 ~
## x1 11.289 3.562 3.169 0.002
## x2 2.047 2.367 0.865 0.387
## x3 4.771 1.276 3.740 0.000
## x4 0.773 0.409 1.889 0.059
## y2 ~
## x1 -0.868 1.586 -0.547 0.584
## x2 1.235 1.054 1.171 0.241
## x3 0.018 0.568 0.033 0.974
## x4 -0.179 0.182 -0.982 0.326
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## .y1 ~~
## .y2 -482.990 226.013 -2.137 0.033
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .y1 4958.629 701.256 7.071 0.000
## .y2 983.116 139.034 7.071 0.000
对于外生变量来说,标准化后的方差其实就是\(1-R^2\),以\(y\)为例,就是说有70%的变异性不能由其解释变量解释。
# 生成数据
x <- rnorm(200, 5, 17)
e1 <- rnorm(200, 0, 10)
e2 <- rnorm(200, 0, 10)
m <- .17 * x + e1
y <- .2 * m + .2 * x + e2
data <- data.frame(x, m, y)
myModel.4 <- "
y ~ b * m + c * x
m ~ a * x
indirect := a * b
direct := c
total := c + (a * b)
"
fit4 <- sem(model = myModel.4, data = data, se = 'bootstrap')
summary(fit4)
## lavaan 0.6-8 ended normally after 16 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 200
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Bootstrap
## Number of requested bootstrap draws 1000
## Number of successful bootstrap draws 1000
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## y ~
## m (b) 0.111 0.079 1.407 0.160
## x (c) 0.220 0.038 5.809 0.000
## m ~
## x (a) 0.134 0.040 3.355 0.001
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .y 102.739 10.809 9.505 0.000
## .m 106.223 10.090 10.528 0.000
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## indirect 0.015 0.011 1.306 0.192
## direct 0.220 0.038 5.806 0.000
## total 0.235 0.038 6.190 0.000
我们再设置一个更复杂的双路径中介效应:
# 生成数据
x <- rnorm(200, 5, 17)
e1 <- rnorm(200, 0, 10)
e2 <- rnorm(200, 0, 10)
e3 <- rnorm(200, 0, 10)
m1 <- .17 * x + e1
m2 <- .3 * x + e2
y <- .2 * m1 + .2 * x - .1 * m2 + e3
data <- data.frame(x, m1, m2, y)
myModel.5 <- "
y ~ a * m1 + b * m2 + c * x
m1 ~ d * x
m2 ~ e * x
indirect := d * a + e * b
direct := c
total := c + d * a + e * b
"
fit5 <- sem(model = myModel.5, data = data, se = 'bootstrap')
summary(fit5)
## lavaan 0.6-8 ended normally after 20 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 8
##
## Number of observations 200
##
## Model Test User Model:
##
## Test statistic 0.113
## Degrees of freedom 1
## P-value (Chi-square) 0.737
##
## Parameter Estimates:
##
## Standard errors Bootstrap
## Number of requested bootstrap draws 1000
## Number of successful bootstrap draws 1000
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## y ~
## m1 (a) 0.166 0.068 2.437 0.015
## m2 (b) -0.013 0.067 -0.191 0.848
## x (c) 0.134 0.052 2.602 0.009
## m1 ~
## x (d) 0.131 0.049 2.679 0.007
## m2 ~
## x (e) 0.350 0.047 7.512 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .y 103.886 9.492 10.945 0.000
## .m1 108.410 9.398 11.535 0.000
## .m2 112.083 11.393 9.838 0.000
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## indirect 0.017 0.027 0.631 0.528
## direct 0.134 0.052 2.601 0.009
## total 0.151 0.043 3.544 0.000