library(foreign)
df <- read.csv("data1_1.csv")model1 <- lm(grade ~ books, data = df)
summary(model1)##
## Call:
## lm(formula = grade ~ books, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.025 -12.616 -1.181 10.425 33.450
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.075 4.035 12.905 1.83e-15 ***
## books 5.738 1.647 3.483 0.00127 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.73 on 38 degrees of freedom
## Multiple R-squared: 0.242, Adjusted R-squared: 0.222
## F-statistic: 12.13 on 1 and 38 DF, p-value: 0.001265
F-statistic is significant, so …
The model explains … per cent of the variance of “grade”.
For each extra book read by the student, the estimated grade changes by …
When books = 0, the estimated grade = …
model2 <- lm(grade ~ attend, data = df)
summary(model2)##
## Call:
## lm(formula = grade ~ attend, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.777 -10.904 2.021 12.430 31.755
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.998 8.169 4.529 5.71e-05 ***
## attend 1.883 0.555 3.393 0.00163 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.83 on 38 degrees of freedom
## Multiple R-squared: 0.2325, Adjusted R-squared: 0.2123
## F-statistic: 11.51 on 1 and 38 DF, p-value: 0.001629
model3 <- lm(grade ~ books + attend, data = df)
summary(model3)##
## Call:
## lm(formula = grade ~ books + attend, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.802 -13.374 0.060 9.173 32.295
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.379 7.745 4.827 2.41e-05 ***
## books 4.037 1.753 2.303 0.0270 *
## attend 1.284 0.587 2.187 0.0352 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.05 on 37 degrees of freedom
## Multiple R-squared: 0.3287, Adjusted R-squared: 0.2924
## F-statistic: 9.059 on 2 and 37 DF, p-value: 0.0006278
anova(model1, model3)## Analysis of Variance Table
##
## Model 1: grade ~ books
## Model 2: grade ~ books + attend
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 38 8250.4
## 2 37 7306.2 1 944.16 4.7814 0.03517 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model2, model3)## Analysis of Variance Table
##
## Model 1: grade ~ attend
## Model 2: grade ~ books + attend
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 38 8353.4
## 2 37 7306.2 1 1047.1 5.3028 0.02701 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
df$attend_cent <- df$attend - mean(df$attend)
#df$attend_cent <- scale(df$attend, center = T, scale = F)
model4 <- lm(grade ~ attend_cent, data = df)
summary(model2)##
## Call:
## lm(formula = grade ~ attend, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.777 -10.904 2.021 12.430 31.755
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.998 8.169 4.529 5.71e-05 ***
## attend 1.883 0.555 3.393 0.00163 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.83 on 38 degrees of freedom
## Multiple R-squared: 0.2325, Adjusted R-squared: 0.2123
## F-statistic: 11.51 on 1 and 38 DF, p-value: 0.001629
summary(model4)##
## Call:
## lm(formula = grade ~ attend_cent, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.777 -10.904 2.021 12.430 31.755
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 63.550 2.344 27.109 < 2e-16 ***
## attend_cent 1.883 0.555 3.393 0.00163 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.83 on 38 degrees of freedom
## Multiple R-squared: 0.2325, Adjusted R-squared: 0.2123
## F-statistic: 11.51 on 1 and 38 DF, p-value: 0.001629
The model quality is the same; but model4 has an interpretable intercept that fits the observed data.
model5 <- lm(grade ~ attend_cent + books, data = df)
summary(model5)##
## Call:
## lm(formula = grade ~ attend_cent + books, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.802 -13.374 0.060 9.173 32.295
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.476 4.151 13.365 9.81e-16 ***
## attend_cent 1.284 0.587 2.187 0.0352 *
## books 4.037 1.753 2.303 0.0270 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.05 on 37 degrees of freedom
## Multiple R-squared: 0.3287, Adjusted R-squared: 0.2924
## F-statistic: 9.059 on 2 and 37 DF, p-value: 0.0006278
anova(model4, model5)## Analysis of Variance Table
##
## Model 1: grade ~ attend_cent
## Model 2: grade ~ attend_cent + books
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 38 8353.4
## 2 37 7306.2 1 1047.1 5.3028 0.02701 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
These are the same models
library(QuantPsyc)
lm.beta(model5)## attend_cent books
## 0.3286422 0.3460987
library(sjPlot)
plot_model(model5, type = "std")Books matter slightly more
library(stargazer)
stargazer(model1, model4, model5, type = "text")##
## =======================================================================================
## Dependent variable:
## -------------------------------------------------------------------
## grade
## (1) (2) (3)
## ---------------------------------------------------------------------------------------
## books 5.738*** 4.037**
## (1.647) (1.753)
##
## attend_cent 1.883*** 1.283**
## (0.555) (0.587)
##
## Constant 52.075*** 63.550*** 55.476***
## (4.035) (2.344) (4.151)
##
## ---------------------------------------------------------------------------------------
## Observations 40 40 40
## R2 0.242 0.233 0.329
## Adjusted R2 0.222 0.212 0.292
## Residual Std. Error 14.735 (df = 38) 14.826 (df = 38) 14.052 (df = 37)
## F Statistic 12.130*** (df = 1; 38) 11.512*** (df = 1; 38) 9.059*** (df = 2; 37)
## =======================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
library(sjPlot)
sjt.lm(model5, show.ci = F, p.numeric = F)| grade | ||
| B | ||
| (Intercept) | 55.48 *** | |
| attend_cent | 1.28 * | |
| books | 4.04 * | |
| Observations | 40 | |
| R2 / adj. R2 | .329 / .292 | |
| Notes | * p<.05 ** p<.01 *** p<.001 | |
library(foreign)
data <- read.spss("ESS5e03.sav", to.data.frame = TRUE)
savevars <- c("eduyrs", "agea", "agertr", "gndr", "cntry", "icmnact", "hinctnta")
ESS1 <- data[savevars]
rm(data, savevars)
head(ESS1)## eduyrs agea agertr gndr cntry icmnact hinctnta
## 1 15 22 <NA> Male Belgium All others <NA>
## 2 15 43 <NA> Male Belgium In paid work F
## 3 13 19 <NA> Female Belgium All others J
## 4 15 23 <NA> Female Belgium In paid work H
## 5 15 58 60 Female Belgium All others S
## 6 13 62 60 Female Belgium Retired F
ESS2 <- na.omit(subset(ESS1, icmnact=="In paid work")) #na.omit - not the best idea, bit it will do for now
median(ESS2$income)## [1] 6
median(ESS2$eduyrs)## [1] 13
mean(ESS2$agea)## [1] 54.07378
ESS2$agea_c <- ESS2$agea - mean(ESS2$agea) #center age
m1 <- lm(agertr ~ agea_c, data = ESS2)
summary(m1)##
## Call:
## lm(formula = agertr ~ agea_c, data = ESS2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57.250 -2.506 0.195 2.505 58.728
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.55648 0.05998 1009.56 <2e-16 ***
## agea_c 0.24455 0.01035 23.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.169 on 7425 degrees of freedom
## Multiple R-squared: 0.06996, Adjusted R-squared: 0.06983
## F-statistic: 558.5 on 1 and 7425 DF, p-value: < 2.2e-16
m2 <- lm(agertr ~ agea_c + gndr, data = ESS2)
summary(m2)##
## Call:
## lm(formula = agertr ~ agea_c + gndr, data = ESS2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57.962 -2.723 -0.002 2.384 57.993
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.30832 0.08332 735.86 <2e-16 ***
## agea_c 0.23869 0.01025 23.30 <2e-16 ***
## gndrFemale -1.52691 0.11879 -12.85 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.113 on 7424 degrees of freedom
## Multiple R-squared: 0.09021, Adjusted R-squared: 0.08996
## F-statistic: 368 on 2 and 7424 DF, p-value: < 2.2e-16
anova(m1, m2)## Analysis of Variance Table
##
## Model 1: agertr ~ agea_c
## Model 2: agertr ~ agea_c + gndr
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 7425 198412
## 2 7424 194093 1 4319.4 165.21 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m3 <- lm(agertr ~ agea_c + gndr + income, data = ESS2)
summary(m3)##
## Call:
## lm(formula = agertr ~ agea_c + gndr + income, data = ESS2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -58.321 -2.667 0.033 2.424 57.785
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.44109 0.17162 352.182 < 2e-16 ***
## agea_c 0.24357 0.01026 23.745 < 2e-16 ***
## gndrFemale -1.44434 0.11939 -12.097 < 2e-16 ***
## income 0.13259 0.02295 5.776 7.95e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.102 on 7423 degrees of freedom
## Multiple R-squared: 0.09428, Adjusted R-squared: 0.09391
## F-statistic: 257.6 on 3 and 7423 DF, p-value: < 2.2e-16
anova(m2, m3)## Analysis of Variance Table
##
## Model 1: agertr ~ agea_c + gndr
## Model 2: agertr ~ agea_c + gndr + income
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 7424 194093
## 2 7423 193224 1 868.5 33.365 7.949e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m4 <- lm(agertr ~ agea_c + gndr + income + eduyrs, data = ESS2)
summary(m4)##
## Call:
## lm(formula = agertr ~ agea_c + gndr + income + eduyrs, data = ESS2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -58.964 -2.634 0.073 2.459 57.756
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59.09525 0.24139 244.817 < 2e-16 ***
## agea_c 0.24686 0.01022 24.145 < 2e-16 ***
## gndrFemale -1.52126 0.11930 -12.751 < 2e-16 ***
## income 0.07234 0.02410 3.002 0.0027 **
## eduyrs 0.13201 0.01672 7.896 3.31e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.081 on 7422 degrees of freedom
## Multiple R-squared: 0.1018, Adjusted R-squared: 0.1013
## F-statistic: 210.3 on 4 and 7422 DF, p-value: < 2.2e-16
anova(m3, m4)## Analysis of Variance Table
##
## Model 1: agertr ~ agea_c + gndr + income
## Model 2: agertr ~ agea_c + gndr + income + eduyrs
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 7423 193224
## 2 7422 191615 1 1609.4 62.339 3.307e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sjt.lm(m3, m4, show.ci = F, p.numeric = F)| agertr | agertr | |||
| B | B | |||
| (Intercept) | 60.44 *** | 59.10 *** | ||
| agea_c | 0.24 *** | 0.25 *** | ||
| gndr (Female) | -1.44 *** | -1.52 *** | ||
| income | 0.13 *** | 0.07 ** | ||
| eduyrs | 0.13 *** | |||
| Observations | 7427 | 7427 | ||
| R2 / adj. R2 | .094 / .094 | .102 / .101 | ||
| Notes | * p<.05 ** p<.01 *** p<.001 | |||
m2i <- lm(agertr ~ agea_c * gndr, data = ESS2)
summary(m2i)##
## Call:
## lm(formula = agertr ~ agea_c * gndr, data = ESS2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -58.07 -2.76 0.02 2.44 57.95
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.30421 0.08334 735.593 <2e-16 ***
## agea_c 0.25488 0.01396 18.257 <2e-16 ***
## gndrFemale -1.52774 0.11878 -12.862 <2e-16 ***
## agea_c:gndrFemale -0.03507 0.02055 -1.707 0.0879 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.112 on 7423 degrees of freedom
## Multiple R-squared: 0.09056, Adjusted R-squared: 0.0902
## F-statistic: 246.4 on 3 and 7423 DF, p-value: < 2.2e-16
anova(m2, m2i) #additive model is no worse than the interaction## Analysis of Variance Table
##
## Model 1: agertr ~ agea_c + gndr
## Model 2: agertr ~ agea_c * gndr
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 7424 194093
## 2 7423 194016 1 76.151 2.9135 0.08788 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m3i <- lm(agertr ~ agea_c + gndr * income, data = ESS2)
summary(m3i)##
## Call:
## lm(formula = agertr ~ agea_c + gndr * income, data = ESS2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -58.287 -2.664 0.029 2.418 57.806
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.53488 0.22898 264.372 < 2e-16 ***
## agea_c 0.24375 0.01026 23.752 < 2e-16 ***
## gndrFemale -1.62083 0.30921 -5.242 1.63e-07 ***
## income 0.11822 0.03265 3.621 0.000296 ***
## gndrFemale:income 0.02833 0.04578 0.619 0.536087
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.102 on 7422 degrees of freedom
## Multiple R-squared: 0.09432, Adjusted R-squared: 0.09384
## F-statistic: 193.2 on 4 and 7422 DF, p-value: < 2.2e-16
anova(m3, m3i) #additive model is no worse than the interaction## Analysis of Variance Table
##
## Model 1: agertr ~ agea_c + gndr + income
## Model 2: agertr ~ agea_c + gndr * income
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 7423 193224
## 2 7422 193214 1 9.9673 0.3829 0.5361
m4i <- lm(agertr ~ agea_c + gndr + income * eduyrs, data = ESS2)
m4ii <- lm(agertr ~ agea_c + income + gndr * eduyrs, data = ESS2)
anova(m4, m4i, m4ii) # interaction model with income * eduyrs is better## Analysis of Variance Table
##
## Model 1: agertr ~ agea_c + gndr + income + eduyrs
## Model 2: agertr ~ agea_c + gndr + income * eduyrs
## Model 3: agertr ~ agea_c + income + gndr * eduyrs
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 7422 191615
## 2 7421 191464 1 150.45 5.8312 0.01577 *
## 3 7421 191576 0 -111.94
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m4i)##
## Call:
## lm(formula = agertr ~ agea_c + gndr + income * eduyrs, data = ESS2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -59.113 -2.646 0.080 2.437 57.794
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.286944 0.549337 109.745 <2e-16 ***
## agea_c 0.245519 0.010236 23.986 <2e-16 ***
## gndrFemale -1.503284 0.119495 -12.580 <2e-16 ***
## income -0.118673 0.082688 -1.435 0.1513
## eduyrs 0.037553 0.042536 0.883 0.3773
## income:eduyrs 0.014503 0.006006 2.415 0.0158 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.079 on 7421 degrees of freedom
## Multiple R-squared: 0.1025, Adjusted R-squared: 0.1019
## F-statistic: 169.6 on 5 and 7421 DF, p-value: < 2.2e-16
sjPlot::plot_model(m4i, type = "pred", terms = "eduyrs")sjPlot::plot_model(m4i, type = "int", terms = "eduyrs", mdrt.values = "minmax")sjPlot::plot_model(m4i, type = "int", terms = "eduyrs", mdrt.values = "meansd")sjPlot::plot_model(m4i, type = "int", terms = "eduyrs", mdrt.values = "quart")sjPlot::plot_model(m4i, type = "diag")## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]