library(rio)
PJ2a <- read.csv("/Users/Lorraine/Desktop/Project2a.csv")
reg <- lm(Jobsat ~ PosAffect + WSE, data = PJ2a)
summary(reg)
##
## Call:
## lm(formula = Jobsat ~ PosAffect + WSE, data = PJ2a)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.552 -4.218 -1.819 3.533 16.430
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.20956 0.66372 1.822 0.0691 .
## PosAffect 0.07090 0.10984 0.645 0.5190
## WSE 0.30475 0.05086 5.991 4.33e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.574 on 440 degrees of freedom
## Multiple R-squared: 0.09102, Adjusted R-squared: 0.08689
## F-statistic: 22.03 on 2 and 440 DF, p-value: 7.613e-10
#normality assumption
PJ2a$residuals <- resid(reg)
PJ2a$standardized.residuals <- rstandard(reg)
PJ2a$studentized.residuals <- rstudent(reg)
#influence
PJ2a$cooks.distance <- cooks.distance(reg)
#individual regression weights
PJ2a$dfbeta <- dfbeta(reg)
#global measures
PJ2a$dffit <- dffits(reg)
#how far a case is from mean values of other IVS
PJ2a$leverage <- hatvalues(reg)
PJ2a$covariance.ratios <- covratio(reg)
#standardized residuals that are beyond +/- 2.00 (95% confidence interval) for normality
PJ2a$standardized.residuals > 2 | PJ2a$standardized.residuals < -2
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [45] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [56] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [78] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [89] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## [100] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [122] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [144] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [155] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [166] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## [177] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## [188] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [199] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [210] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## [221] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [232] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [243] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## [254] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## [276] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [287] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## [298] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [309] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
## [320] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [331] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [342] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
## [353] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [364] FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE
## [375] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
## [386] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [397] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [408] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [419] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [430] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [441] FALSE FALSE FALSE
PJ2a$large.residual <- PJ2a$standardized.residuals>2 | PJ2a$standardized.residuals < -2
sum(PJ2a$large.residual) ###normal
## [1] 17
PJ2a[PJ2a$large.residual, c("WSE", "Jobsat", "PosAffect", "standardized.residuals")] ###In the normal range
## WSE Jobsat PosAffect standardized.residuals
## 26 7.000000 18 2 2.610105
## 44 15.695698 18 9 2.050092
## 45 5.000000 14 1 2.015813
## 95 11.000000 19 5 2.529884
## 130 6.000000 15 0 2.155506
## 174 10.000000 17 3 2.251086
## 186 20.000000 20 8 2.186716
## 219 9.033865 19 1 2.692821
## 249 21.000000 21 6 2.336502
## 272 2.000000 18 0 2.920053
## 291 10.000000 20 5 2.764618
## 314 20.000000 20 4 2.236286
## 347 17.000000 20 4 2.396676
## 365 4.000000 19 2 2.957546
## 371 8.000000 15 1 2.029919
## 373 20.000000 19 8 2.006417
## 380 8.000000 18 6 2.505142
PJ2a[PJ2a$large.residual, c("cooks.distance", "leverage", "covariance.ratios")] ###covariances are all bolow 1, looks fine
## cooks.distance leverage covariance.ratios
## 26 0.010510374 0.004607000 0.9652486
## 44 0.014401842 0.010175389 0.9883284
## 45 0.009822109 0.007199259 0.9863114
## 95 0.005483886 0.002563855 0.9660247
## 130 0.013764151 0.008809061 0.9839570
## 174 0.004721753 0.002787585 0.9751798
## 186 0.015992891 0.009934075 0.9841554
## 219 0.014085830 0.005793825 0.9634681
## 249 0.016478978 0.008974373 0.9786178
## 272 0.033713080 0.011722416 0.9607032
## 291 0.007210587 0.002822242 0.9579901
## 314 0.014379409 0.008552194 0.9812992
## 347 0.009549662 0.004962835 0.9727572
## 365 0.019896416 0.006777659 0.9544565
## 371 0.008145994 0.005895761 0.9846313
## 373 0.013464320 0.009934075 0.9892931
## 380 0.011074215 0.005265947 0.9694839
#outlier/influence
PJ2a[PJ2a$large.residual, c("dfbeta", "dffit")] ###Data are in the normal range
## dfbeta.(Intercept) dfbeta.PosAffect dfbeta.WSE dffit
## 26 1.083995e-01 -8.519158e-03 -3.454187e-03 0.1787574
## 44 -5.919727e-02 1.839310e-02 6.711117e-04 0.2086218
## 45 1.100637e-01 -9.585496e-03 -3.858477e-03 0.1722597
## 95 2.609879e-02 4.740411e-03 -1.236315e-03 0.1290603
## 130 1.206201e-01 -1.568365e-02 -2.385028e-03 0.2040546
## 174 5.540910e-02 -4.770443e-03 -6.071028e-04 0.1195731
## 186 -8.848149e-02 1.162973e-02 5.856774e-03 0.2199899
## 219 1.045843e-01 -1.651549e-02 -9.413908e-05 0.2070457
## 249 -7.923120e-02 1.458442e-03 8.947121e-03 0.2234821
## 272 2.093081e-01 -1.726608e-02 -8.689250e-03 0.3207858
## 291 3.928403e-02 6.126227e-03 -2.636231e-03 0.1482030
## 314 -4.375165e-02 -7.560158e-03 9.052813e-03 0.2086504
## 347 -1.879103e-02 -5.626620e-03 6.338972e-03 0.1701821
## 365 1.575644e-01 -6.623850e-03 -8.050581e-03 0.2464985
## 371 8.702312e-02 -1.173174e-02 -1.047960e-03 0.1568851
## 373 -8.118600e-02 1.067083e-02 5.373871e-03 0.2016762
## 380 4.211551e-02 1.271009e-02 -5.581079e-03 0.1833760
#independence assumption
library(car)
## Loading required package: carData
durbinWatsonTest(reg)
## lag Autocorrelation D-W Statistic p-value
## 1 -0.03107735 2.059358 0.536
## Alternative hypothesis: rho != 0
dwt(reg) ###result was very close to 2, not significant
## lag Autocorrelation D-W Statistic p-value
## 1 -0.03107735 2.059358 0.532
## Alternative hypothesis: rho != 0
#assessing multicollinearity
vif(reg) ### less than 10, good
## PosAffect WSE
## 1.131279 1.131279
#assumptions about residuals
PJ2a$fitted <- reg$fitted.values
#simple histogram
hist(PJ2a$studentized.residuals)
#Q-Q plot
qqnorm(PJ2a$studentized.residuals, pch = 1, frame = FALSE)
qqline(PJ2a$studentized.residuals, col = "steelblue", lwd = 2)
library(ggplot2)
#scatterplot of predicted values against residuals
#band should be consistent
scatter <- ggplot(PJ2a, aes(fitted, studentized.residuals))
scatter + geom_point() + geom_smooth(method = "lm", colour = "Blue") + labs(x = "Fitted Values", y = "Studentized Residuals")
reg1 <- lm(Jobsat ~ PosAffect, data=PJ2a)
summary(reg1)
##
## Call:
## lm(formula = Jobsat ~ PosAffect, data = PJ2a)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.417 -4.532 -2.647 3.944 15.468
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.7613 0.5288 7.113 4.62e-12 ***
## PosAffect 0.2951 0.1073 2.751 0.00619 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.79 on 441 degrees of freedom
## Multiple R-squared: 0.01687, Adjusted R-squared: 0.01464
## F-statistic: 7.566 on 1 and 441 DF, p-value: 0.006194
reg2 <- lm(PosAffect ~ WSE, data=PJ2a)
summary(reg2)
##
## Call:
## lm(formula = PosAffect ~ WSE, data = PJ2a)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7129 -2.0819 -0.2397 1.8913 5.9688
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.40020 0.26407 9.089 < 2e-16 ***
## WSE 0.15775 0.02073 7.609 1.69e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.417 on 441 degrees of freedom
## Multiple R-squared: 0.116, Adjusted R-squared: 0.114
## F-statistic: 57.89 on 1 and 441 DF, p-value: 1.69e-13
reg3 <- lm(Jobsat ~ PosAffect + WSE, data=PJ2a)
summary(reg3)
##
## Call:
## lm(formula = Jobsat ~ PosAffect + WSE, data = PJ2a)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.552 -4.218 -1.819 3.533 16.430
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.20956 0.66372 1.822 0.0691 .
## PosAffect 0.07090 0.10984 0.645 0.5190
## WSE 0.30475 0.05086 5.991 4.33e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.574 on 440 degrees of freedom
## Multiple R-squared: 0.09102, Adjusted R-squared: 0.08689
## F-statistic: 22.03 on 2 and 440 DF, p-value: 7.613e-10
library(mediation)
## Loading required package: MASS
## Loading required package: Matrix
## Loading required package: mvtnorm
## Loading required package: sandwich
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
## mediation: Causal Mediation Analysis
## Version: 4.5.0
library(foreign)
library(lavaan)
## This is lavaan 0.6-5
## lavaan is BETA software! Please report any bugs.
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:lavaan':
##
## cor2cov
## The following object is masked from 'package:mediation':
##
## mediate
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:car':
##
## logit
library(multilevel)
## Loading required package: nlme
med.out <- mediate(Jobsat ~ PosAffect + (WSE), data=PJ2a)
summary(med.out)
## Call: mediate(y = Jobsat ~ PosAffect + (WSE), data = PJ2a)
##
## Total effect estimates (c)
## Jobsat se t df Prob
## PosAffect 0.3 0.11 2.75 441 0.00619
##
## Direct effect estimates (c')
## Jobsat se t df Prob
## PosAffect 0.07 0.11 0.65 440 5.19e-01
## WSE 0.30 0.05 5.99 440 4.33e-09
##
## R = 0.3 R2 = 0.09 F = 22.03 on 2 and 440 DF p-value: 7.61e-10
##
## 'a' effect estimates
## WSE se t df Prob
## PosAffect 0.74 0.1 7.61 441 1.69e-13
##
## 'b' effect estimates
## Jobsat se t df Prob
## WSE 0.3 0.05 5.99 440 4.33e-09
##
## 'ab' effect estimates
## Jobsat boot sd lower upper
## PosAffect 0.22 0.23 0.05 0.14 0.33
print(med.out, digits = 4)
##
## Mediation/Moderation Analysis
## Call: mediate(y = Jobsat ~ PosAffect + (WSE), data = PJ2a)
##
## The DV (Y) was Jobsat . The IV (X) was PosAffect . The mediating variable(s) = WSE .
##
## Total effect(c) of PosAffect on Jobsat = 0.2951 S.E. = 0.1073 t = 2.7506 df= 441 with p = 0.006194
## Direct effect (c') of PosAffect on Jobsat removing WSE = 0.0709 S.E. = 0.1098 t = 0.6454 df= 440 with p = 0.519
## Indirect effect (ab) of PosAffect on Jobsat through WSE = 0.2242
## Mean bootstrapped indirect effect = 0.2252 with standard error = 0.049 Lower CI = 0.138 Upper CI = 0.3275
## R = 0.3017 R2 = 0.091 F = 22.0305 on 2 and 440 DF p-value: 7.6135e-10
##
## To see the longer output, specify short = FALSE in the print statement or ask for the summary
Question 1. The Total effect was 0.2951, the direct effect was 0.0709, the indirect effect was 0.2242. Question 2. The indirect effect was significant with a p-value of 7.6135e-10, but the direct effect was not significant with a p-value of 0.519. Question 3. Work self-efficacy explains 9.1% of the relation between positive affect and job satisfaction. R-square = 0.091, F(2, 440) = 22.0305, p < 0.05.
sobel(PJ2a$Jobsat, PJ2a$PosAffect, PJ2a$WSE)
## $`Mod1: Y~X`
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.0421152 0.33155121 30.288278 8.546196e-110
## pred 0.2853842 0.04316964 6.610761 1.106763e-10
##
## $`Mod2: Y~X+M`
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.4423561 0.48435720 15.365429 5.811736e-43
## pred 0.2475116 0.04131125 5.991384 4.332008e-09
## med 0.6625965 0.09386431 7.059088 6.567835e-12
##
## $`Mod3: M~X`
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.92359321 0.15959787 24.584246 1.111794e-84
## pred 0.05715788 0.02078045 2.750561 6.194155e-03
##
## $Indirect.Effect
## [1] 0.03787261
##
## $SE
## [1] 0.01477738
##
## $z.value
## [1] 2.562878
##
## $N
## [1] 443
library(powerMediation)
testMediation.Sobel(theta.1.hat=0.7356, lambda.hat=.3048,
sigma.theta1=0.0967, sigma.lambda=0.0476, alpha=0.05)
## $pval
## [1] 9.641632e-07
##
## $CI.low
## [1] 0.1345065
##
## $CI.upp
## [1] 0.3139153
model <- "WSE ~ a * PosAffect
Jobsat ~ b * WSE
Jobsat ~ cp * PosAffect
ab := a*b
total:= cp + ab"
fit <- lavaan::sem(model = model, data = PJ2a, se = "boot", bootstrap = 500)
## Warning in lav_model_estimate(lavmodel = lavmodel, lavpartable =
## lavpartable, : lavaan WARNING: the optimizer warns that a solution has NOT
## been found!
## Warning in lav_model_estimate(lavmodel = lavmodel, lavpartable =
## lavpartable, : lavaan WARNING: the optimizer warns that a solution has NOT
## been found!
## Warning in bootstrap.internal(object = NULL, lavmodel. = lavmodel,
## lavsamplestats. = lavsamplestats, : lavaan WARNING: only 497 bootstrap
## draws were successful
lavaan::parameterEstimates(fit, boot.ci.type = "bca.simple")
## lhs op rhs label est se z pvalue ci.lower
## 1 WSE ~ PosAffect a 0.736 0.099 7.428 0.000 0.519
## 2 Jobsat ~ WSE b 0.305 0.051 5.990 0.000 0.197
## 3 Jobsat ~ PosAffect cp 0.071 0.115 0.616 0.538 -0.133
## 4 WSE ~~ WSE 27.108 1.574 17.220 0.000 24.270
## 5 Jobsat ~~ Jobsat 30.860 1.886 16.358 0.000 27.312
## 6 PosAffect ~~ PosAffect 6.576 0.000 NA NA 6.576
## 7 ab := a*b ab 0.224 0.048 4.715 0.000 0.136
## 8 total := cp+ab total 0.295 0.116 2.537 0.011 0.089
## ci.upper
## 1 0.906
## 2 0.404
## 3 0.320
## 4 30.297
## 5 34.835
## 6 6.576
## 7 0.323
## 8 0.568
summary(fit)
## lavaan 0.6-5 ended normally after 20 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of free parameters 5
##
## Number of observations 443
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Bootstrap
## Number of requested bootstrap draws 500
## Number of successful bootstrap draws 497
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## WSE ~
## PosAffect (a) 0.736 0.099 7.428 0.000
## Jobsat ~
## WSE (b) 0.305 0.051 5.990 0.000
## PosAffect (cp) 0.071 0.115 0.616 0.538
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .WSE 27.108 1.574 17.220 0.000
## .Jobsat 30.860 1.886 16.358 0.000
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|)
## ab 0.224 0.048 4.715 0.000
## total 0.295 0.116 2.537 0.011
95% CI from Sobel’s test: [0.1345, 0.3139] 95% CI from Boostrap: [0.141, 0.343] The results are different, becuase Sobel’s test is way more conservative than bootstrap. Bootstrap treats the sample as populatioin and resampled from it with replacement 500 times.
PJ2b <- read.csv("/Users/Lorraine/Desktop/Project2b.csv")
reg4 <- lm(JOBSAT ~ TRAIN + AGE, data = PJ2b)
summary(reg4)
##
## Call:
## lm(formula = JOBSAT ~ TRAIN + AGE, data = PJ2b)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.69903 -0.56305 0.02654 0.56258 1.65967
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.706040 0.207331 8.229 2.39e-15 ***
## TRAIN 0.392959 0.052357 7.505 3.68e-13 ***
## AGE 0.004504 0.002569 1.754 0.0802 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7112 on 421 degrees of freedom
## Multiple R-squared: 0.1239, Adjusted R-squared: 0.1198
## F-statistic: 29.78 on 2 and 421 DF, p-value: 8.009e-13
#normality
PJ2b$residuals <- resid(reg4)
PJ2b$standardized.residuals <- rstandard(reg4)
PJ2b$studentized.residuals <- rstudent(reg4)
#influence
PJ2b$cooks.distance <- cooks.distance(reg4)
#individual regression weights
PJ2b$dfbeta <- dfbeta(reg4)
#global measures
PJ2b$dffit <- dffits(reg4)
#how far a case is from mean values of other IVS
PJ2b$leverage <- hatvalues(reg4)
PJ2b$covariance.ratios <- covratio(reg4)
#standardized residuals that are beyond +/- 2.00 (95% confidence interval) for normality
PJ2b$standardized.residuals > 2 | PJ2b$standardized.residuals < -2
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## [56] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [78] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [89] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [100] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [122] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [144] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [155] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## [166] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [177] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [188] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [199] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [210] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [232] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [243] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## [254] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [276] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [287] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [298] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [309] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [320] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [331] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [342] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [353] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [364] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [375] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## [386] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [397] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [408] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [419] FALSE FALSE FALSE FALSE FALSE FALSE
PJ2b$large.residual <- PJ2b$standardized.residuals>2 | PJ2b$standardized.residuals < -2
sum(PJ2b$large.residual) ###normal
## [1] 8
PJ2b[PJ2b$large.residual, c("AGE", "JOBSAT", "TRAIN", "standardized.residuals")] ###In the normal range
## AGE JOBSAT TRAIN standardized.residuals
## 23 64 4.463800 2.500000 2.104560
## 36 23 4.799315 4.000000 2.002922
## 53 18 4.850316 3.666667 2.292933
## 90 28 5.063672 4.000000 2.342067
## 162 29 4.937920 4.000000 2.157882
## 224 25 1.691453 4.000000 -2.399042
## 250 21 5.112074 4.500000 2.186520
## 379 29 1.845453 3.833333 -2.111821
PJ2b[PJ2b$large.residual, c("cooks.distance", "leverage", "covariance.ratios")] ###covariances are all bolow 1, looks fine
## cooks.distance leverage covariance.ratios
## 23 0.019189493 0.012830795 0.9883871
## 36 0.012463724 0.009234467 0.9877632
## 53 0.018044687 0.010191520 0.9798827
## 90 0.013130197 0.007129949 0.9752568
## 162 0.010606940 0.006787321 0.9807660
## 224 0.016084606 0.008314384 0.9745185
## 250 0.024617097 0.015212269 0.9882700
## 379 0.008586094 0.005742508 0.9811244
#outlier/influence
PJ2b[PJ2b$large.residual, c("dfbeta", "dffit")] ###Data are in the normal range
## dfbeta.(Intercept) dfbeta.TRAIN dfbeta.AGE dffit
## 23 0.0082867682 -0.0069256778 0.0004430599 0.2409199
## 36 0.0006351014 0.0051450626 -0.0003478411 0.1940650
## 53 0.0149893753 0.0029442610 -0.0005044760 0.2338556
## 90 -0.0037462843 0.0059970544 -0.0002972706 0.1995390
## 162 -0.0042779666 0.0055221192 -0.0002537564 0.1791655
## 224 0.0010796725 -0.0061544924 0.0003717380 -0.2209218
## 250 -0.0117629141 0.0098849359 -0.0004229630 0.2729874
## 379 -0.0003465190 -0.0040408154 0.0002478244 -0.1611590
#independence assumption
durbinWatsonTest(reg4)
## lag Autocorrelation D-W Statistic p-value
## 1 0.01693611 1.963104 0.688
## Alternative hypothesis: rho != 0
dwt(reg4) ###result was very close to 2, not significant
## lag Autocorrelation D-W Statistic p-value
## 1 0.01693611 1.963104 0.75
## Alternative hypothesis: rho != 0
#assessing multicollinearity
vif(reg4) ### less than 10, good
## TRAIN AGE
## 1.000033 1.000033
#assumptions about residuals
PJ2b$fitted <- reg4$fitted.values
#simple histogram
hist(PJ2b$studentized.residuals)
#Q-Q plot
qqnorm(PJ2b$studentized.residuals, pch = 1, frame = FALSE)
qqline(PJ2b$studentized.residuals, col = "steelblue", lwd = 2)
#scatterplot of predicted values against residuals
#band should be consistent
scatter <- ggplot(PJ2b, aes(fitted, studentized.residuals))
scatter + geom_point() + geom_smooth(method = "lm", colour = "Blue") + labs(x = "Fitted Values", y = "Studentized Residuals")
library(jtools)
library(QuantPsyc)
## Loading required package: boot
##
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:car':
##
## logit
##
## Attaching package: 'QuantPsyc'
## The following object is masked from 'package:Matrix':
##
## norm
## The following object is masked from 'package:base':
##
## norm
library(interactions)
reg4a <- lm (JOBSAT ~ TRAIN + AGE,data=PJ2b)
reg4b <- lm (JOBSAT ~ TRAIN + AGE + TRAIN*AGE, data=PJ2b)
summary(reg4a)
##
## Call:
## lm(formula = JOBSAT ~ TRAIN + AGE, data = PJ2b)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.69903 -0.56305 0.02654 0.56258 1.65967
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.706040 0.207331 8.229 2.39e-15 ***
## TRAIN 0.392959 0.052357 7.505 3.68e-13 ***
## AGE 0.004504 0.002569 1.754 0.0802 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7112 on 421 degrees of freedom
## Multiple R-squared: 0.1239, Adjusted R-squared: 0.1198
## F-statistic: 29.78 on 2 and 421 DF, p-value: 8.009e-13
summary(reg4b)
##
## Call:
## lm(formula = JOBSAT ~ TRAIN + AGE + TRAIN * AGE, data = PJ2b)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.83777 -0.59433 0.04039 0.58697 1.54724
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.013603 0.569096 -0.024 0.980941
## TRAIN 0.912337 0.168492 5.415 1.03e-07 ***
## AGE 0.046762 0.013290 3.518 0.000481 ***
## TRAIN:AGE -0.012756 0.003938 -3.239 0.001294 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7033 on 420 degrees of freedom
## Multiple R-squared: 0.1453, Adjusted R-squared: 0.1392
## F-statistic: 23.8 on 3 and 420 DF, p-value: 3.043e-14
anova(reg4a, reg4b)
## Analysis of Variance Table
##
## Model 1: JOBSAT ~ TRAIN + AGE
## Model 2: JOBSAT ~ TRAIN + AGE + TRAIN * AGE
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 421 212.93
## 2 420 207.74 1 5.1899 10.493 0.001294 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model 1 was significant, R-squared = 0.1239, F(2,421) = 29.78, p < 0.05. However, only training significantly contributed to the model. Model 2 was also significant, R-squared = 0.1453, F(3,420) = 23.8, p < 0.05. The ANOVA test showed that the interaction between training and age was significant, suggesting that age is a moderator in the relation between training and job satisfaction.
PJ2b$mcTRAIN<-PJ2b$TRAIN-mean(PJ2b$TRAIN)
PJ2b$mcAGE<-PJ2b$AGE-mean(PJ2b$AGE)
reg5<-lm(JOBSAT~mcTRAIN*mcAGE,data=PJ2b)
summary(reg5)
##
## Call:
## lm(formula = JOBSAT ~ mcTRAIN * mcAGE, data = PJ2b)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.83777 -0.59433 0.04039 0.58697 1.54724
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.207101 0.034155 93.897 < 2e-16 ***
## mcTRAIN 0.382284 0.051881 7.368 9.23e-13 ***
## mcAGE 0.004133 0.002543 1.626 0.10480
## mcTRAIN:mcAGE -0.012756 0.003938 -3.239 0.00129 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7033 on 420 degrees of freedom
## Multiple R-squared: 0.1453, Adjusted R-squared: 0.1392
## F-statistic: 23.8 on 3 and 420 DF, p-value: 3.043e-14
reg5a <- lm (JOBSAT ~ mcTRAIN + mcAGE,data=PJ2b)
reg5b <- lm (JOBSAT ~ mcTRAIN + mcAGE + mcTRAIN*mcAGE, data=PJ2b)
summary(reg5a)
##
## Call:
## lm(formula = JOBSAT ~ mcTRAIN + mcAGE, data = PJ2b)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.69903 -0.56305 0.02654 0.56258 1.65967
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.206448 0.034538 92.839 < 2e-16 ***
## mcTRAIN 0.392959 0.052357 7.505 3.68e-13 ***
## mcAGE 0.004504 0.002569 1.754 0.0802 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7112 on 421 degrees of freedom
## Multiple R-squared: 0.1239, Adjusted R-squared: 0.1198
## F-statistic: 29.78 on 2 and 421 DF, p-value: 8.009e-13
summary(reg5b)
##
## Call:
## lm(formula = JOBSAT ~ mcTRAIN + mcAGE + mcTRAIN * mcAGE, data = PJ2b)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.83777 -0.59433 0.04039 0.58697 1.54724
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.207101 0.034155 93.897 < 2e-16 ***
## mcTRAIN 0.382284 0.051881 7.368 9.23e-13 ***
## mcAGE 0.004133 0.002543 1.626 0.10480
## mcTRAIN:mcAGE -0.012756 0.003938 -3.239 0.00129 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7033 on 420 degrees of freedom
## Multiple R-squared: 0.1453, Adjusted R-squared: 0.1392
## F-statistic: 23.8 on 3 and 420 DF, p-value: 3.043e-14
anova(reg5a, reg5b)
## Analysis of Variance Table
##
## Model 1: JOBSAT ~ mcTRAIN + mcAGE
## Model 2: JOBSAT ~ mcTRAIN + mcAGE + mcTRAIN * mcAGE
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 421 212.93
## 2 420 207.74 1 5.1899 10.493 0.001294 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The results of model 1 using both centered and raw data are the same except the intercept. For model 2, both the intercept and the regression coefficients for both TRAIN and AGE changed. However, the regression coefficient of the interaction remained the same, so did R-square and F-statistic. In addition, Age was no longer a moderater in the centered model. It allows an easier interpretation.
probe_interaction(reg5, mcTRAIN, mcAGE)
## JOHNSON-NEYMAN INTERVAL
##
## When mcAGE is OUTSIDE the interval [16.68, 79.17], the slope of
## mcTRAIN is p < .05.
##
## Note: The range of observed values of mcAGE is [-23.55, 22.45]
##
## SIMPLE SLOPES ANALYSIS
##
## Slope of mcTRAIN when mcAGE = -13.46 (- 1 SD):
##
## Est. S.E. t val. p
## ------ ------ -------- ------
## 0.55 0.07 7.72 0.00
##
## Slope of mcTRAIN when mcAGE = 0.00 (Mean):
##
## Est. S.E. t val. p
## ------ ------ -------- ------
## 0.38 0.05 7.37 0.00
##
## Slope of mcTRAIN when mcAGE = 13.46 (+ 1 SD):
##
## Est. S.E. t val. p
## ------ ------ -------- ------
## 0.21 0.08 2.75 0.01
The slops all have a p-value < 0.05, they’re all significant
A moderation analysis was run to examine whether Age was a moderator of the relation between training level and job satisfaction. Age and Training level were centered for an easier interpretation. Age and training level were entered in the first step of the regression analysis. In the second step of the regression analysis, the interaction term between age and training level was entered, and it explained a significant increase in variance in job satisfaction, ΔR2 = 0.0214, F(1,420) = 10.493, p< 0.05. Thus, age was a significant moderator of the relation between training level and job satisfaction. Results of a simple slope analysis revealed that the unstandardized simple slope for individual 1 SD Below the mean of age was B = 0.55, p < 0.01, the unstandardized simple slope for individuals with a mean age was B = 0.38, p < 0.05, and the unstandardized simple slope for individuals 1 SD above the mean age was B = 0.21, P < 0.05.