library(psych)
library(knitr)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
library(lavaan)
## This is lavaan 0.6-3
## lavaan is BETA software! Please report any bugs.
##
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
##
## cor2cov
library(semPlot)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
rm(list = ls())
setwd("/Users/huangzongxian/Desktop/高等量化分析/Moderation")
options(digits = 3,scipen = 999)
mathmod<- read.csv("mathmod.csv")
str(mathmod) #gender=1:女性 #gender=0:男性
## 'data.frame': 101 obs. of 3 variables:
## $ training: num 0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 ...
## $ gender : int 0 1 1 0 1 0 0 1 0 0 ...
## $ math : num 4.5 2.4 3.1 5.8 3.2 4.6 5 1.8 4.6 5.6 ...
math與training的迴歸分析,會發現結果怪怪的,投入教育並不會對於數學成績有幫助summary(lm(math~training,data=mathmod))
##
## Call:
## lm(formula = math ~ training, data = mathmod)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7786 -0.6713 -0.0345 0.6936 3.0016
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.4957 0.2355 14.84 <0.0000000000000002 ***
## training -0.0401 0.0407 -0.99 0.33
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.19 on 99 degrees of freedom
## Multiple R-squared: 0.00973, Adjusted R-squared: -0.00027
## F-statistic: 0.973 on 1 and 99 DF, p-value: 0.326
xz後,加入調節變項再進行一次迴歸分析,星星都跑出來了!mathmod$xz<-mathmod$training*mathmod$gender
summary(lm(math~training+gender+xz,data=mathmod))
##
## Call:
## lm(formula = math ~ training + gender + xz, data = mathmod)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.684 -0.589 -0.106 0.781 2.235
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.9900 0.2750 18.15 < 0.0000000000000002 ***
## training -0.3394 0.0539 -6.30 0.000000008695 ***
## gender -2.7569 0.3791 -7.27 0.000000000091 ***
## xz 0.5043 0.0685 7.37 0.000000000058 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.953 on 97 degrees of freedom
## Multiple R-squared: 0.38, Adjusted R-squared: 0.361
## F-statistic: 19.8 on 3 and 97 DF, p-value: 0.000000000426
{plot(mathmod$training, mathmod$math, type='n') ## create an empty frame
abline(5, -.34) ## for male ##男性迴歸線
abline(2.23, .16, lty=2, col='red') ## for female ##女性迴歸線
legend('topright', c('Male', 'Female'), lty=c(1,2),col=c('black', 'red'))
## add scatter plot
points(mathmod$training[mathmod$gender==0], mathmod$math[mathmod$gender==0])
points(mathmod$training[mathmod$gender==1], mathmod$math[mathmod$gender==1], col='red')}
iris為例scatterplot(Sepal.Length~Sepal.Width|Species, data=iris, smooth=FALSE)
inter加入迴歸當中depress<- read.csv("depress.csv")
str(depress)
## 'data.frame': 100 obs. of 3 variables:
## $ stress : int 7 8 2 7 6 2 4 6 5 5 ...
## $ support: int 5 7 2 6 9 8 2 9 10 10 ...
## $ depress: int 32 20 30 25 19 25 33 16 18 17 ...
depress$inter<-depress$stress*depress$support
summary(lm(depress~stress+support+inter, data=depress))
##
## Call:
## lm(formula = depress ~ stress + support + inter, data = depress)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.732 -0.903 -0.113 0.854 3.609
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.2583 0.6909 42.35 <0.0000000000000002 ***
## stress 1.9956 0.1161 17.19 <0.0000000000000002 ***
## support -0.2356 0.1109 -2.12 0.036 *
## inter -0.3902 0.0188 -20.75 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.39 on 96 degrees of freedom
## Multiple R-squared: 0.964, Adjusted R-squared: 0.963
## F-statistic: 853 on 3 and 96 DF, p-value: <0.0000000000000002
support效果分為高、中、低三個類別接下來我們一樣將他們丟入所求得的迴歸式當中,求得三條不同的迴歸式
此時的迴歸式為:Y=29.26+2.0(stress)+(-0.24)(8.18)+(-0.39)(stress)x8.18 -> Y= 27.3-1.19stress
mean(depress$support)+sd(depress$support) #8.18
## [1] 8.18
mean(depress$support) #5.37
## [1] 5.37
mean(depress$support)-sd(depress$support) #2.56
## [1] 2.56
"rockchalk套件當中的plotSlopes()就可以更簡單的畫出來(還更為美觀){## create an empty frame
plot(depress$stress, depress$depress, type='n', xlab='Stress', ylab='Depression')
## abline(interceptvalue, linearslopevalue)
# for support = mean -1SD ##Low
abline(28.65, 1)
# for support = mean ##Medium
abline(27.97, -.09, col='blue')
# for support = mean +1SD ##High
abline(27.30, -1.19, col='red')
legend('topleft', c('Low', 'Medium', 'High'), lty=c(1,1,1), col=c('black','blue','red'))}
library(rockchalk)
example.1<- lm(depress~stress*support, data=depress)
plotSlopes(example.1, plotx="stress",modx="support", modxVals="std.dev.",interval="confidence", main = "Simple Slopes Plot Example")
家庭環境的影響?nlsy <- read.csv("nlsy.csv")
str(nlsy)
## 'data.frame': 371 obs. of 3 variables:
## $ ME : int 12 15 9 8 9 10 7 11 11 3 ...
## $ HE : int 6 6 5 3 6 7 7 6 8 3 ...
## $ math: int 6 11 3 6 11 8 9 9 9 7 ...
Show that X is correlated with Y. Regress Y on X to estimate and test the path c. This step establishes that there is an effect that may be mediated.
Show that X is correlated with M. Regress M on X to estimate and test path a. This step essentially involves treating the mediator as if it were an outcome variable.
Show that M affects Y. Regress Y on both X and M to estimate and test path b. Note that it is not sufficient just to correlate the mediator with the outcome; the mediator and the outcome may be correlated because they are both caused by the input variable X. Thus, the input variable X must be controlled in establishing the effect of the mediator on the outcome.
To establish that M completely mediates the X-Y relationship, the effect of X on Y controlling for M (path c) should be zero. The effects in both Steps 3 and 4 are estimated in the same equation.
比較三個迴歸式子的影響
lm.1<- lm(math~ME,data=nlsy)
lm.2<- lm(HE~ME,data=nlsy)
lm.3<- lm(math~HE+ME,data=nlsy)
stargazer(lm.1,lm.2,lm.3,type="text")
##
## ===========================================================================================
## Dependent variable:
## -----------------------------------------------------------------------
## math HE math
## (1) (2) (3)
## -------------------------------------------------------------------------------------------
## HE 0.465***
## (0.143)
##
## ME 0.528*** 0.139*** 0.463***
## (0.120) (0.043) (0.120)
##
## Constant 6.180*** 4.110*** 4.270***
## (1.370) (0.491) (1.480)
##
## -------------------------------------------------------------------------------------------
## Observations 371 371 371
## R2 0.050 0.028 0.076
## Adjusted R2 0.047 0.025 0.071
## Residual Std. Error 4.620 (df = 369) 1.660 (df = 369) 4.560 (df = 368)
## F Statistic 19.300*** (df = 1; 369) 10.500*** (df = 1; 369) 15.200*** (df = 2; 368)
## ===========================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
sem進行路徑分析的方式來檢驗model <- "
math~ME
HE~ME
math~HE"
path.1<- sem(model,data = nlsy)
summary(path.1)
## lavaan 0.6-3 ended normally after 21 iterations
##
## Optimization method NLMINB
## Number of free parameters 5
##
## Number of observations 371
##
## Estimator ML
## Model Fit Test Statistic 0.000
## Degrees of freedom 0
## Minimum Function Value 0.0000000000000
##
## Parameter Estimates:
##
## Information Expected
## Information saturated (h1) model Structured
## Standard Errors Standard
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## math ~
## ME 0.463 0.120 3.869 0.000
## HE ~
## ME 0.139 0.043 3.249 0.001
## math ~
## HE 0.465 0.143 3.252 0.001
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .math 20.621 1.514 13.620 0.000
## .HE 2.724 0.200 13.620 0.000
semPaths(path.1,whatLabels="par",
rotation = 2,
mar = c(3, 3, 3, 3), # 邊界
intercepts = TRUE, # 殘差顯示
edge.label.cex = 0.8, # label 的字體大小
sizeMan = 7, # measurement variable 顯示大小
sizeLat = 10, # latent variable 顯示大小
exoCov = FALSE)