library(lme4) library(stargazer) load("dragons.RData") head(dragons)
15/3/2021
library(lme4) library(stargazer) load("dragons.RData") head(dragons)
## Loading required package: Matrix
## ## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
## testScore bodyLength mountainRange site ## 1 16.147309 165.5485 Bavarian a ## 2 33.886183 167.5593 Bavarian a ## 3 6.038333 165.8830 Bavarian a ## 4 18.838821 167.6855 Bavarian a ## 5 33.862328 169.9597 Bavarian a ## 6 47.043246 168.6887 Bavarian a
hist(dragons$testScore)
dragons$bodyLength2 <- scale(dragons$bodyLength) #I wouldn't normally do this basic.lm <- lm(testScore ~ bodyLength2, data = dragons) summary(basic.lm)
## ## Call: ## lm(formula = testScore ~ bodyLength2, data = dragons) ## ## Residuals: ## Min 1Q Median 3Q Max ## -56.962 -16.411 -0.783 15.193 55.200 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 50.3860 0.9676 52.072 <2e-16 *** ## bodyLength2 8.9956 0.9686 9.287 <2e-16 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 21.2 on 478 degrees of freedom ## Multiple R-squared: 0.1529, Adjusted R-squared: 0.1511 ## F-statistic: 86.25 on 1 and 478 DF, p-value: < 2.2e-16
## `geom_smooth()` using formula 'y ~ x'
Plot the residuals - the red line should be close to being flat, like the dashed grey line
Have a quick look at the qqplot too - point should ideally fall onto the diagonal dashed linea bit off at the extremes, but that’s often the case; again doesn’t look too bad
However, what about observation independence? Are our data independent?
However, what about observation independence? Are our data independent?
## ## Call: ## lm(formula = testScore ~ bodyLength2 + mountainRange, data = dragons) ## ## Residuals: ## Min 1Q Median 3Q Max ## -52.263 -9.926 0.361 9.994 44.488 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 23.3818 2.5792 9.065 < 2e-16 *** ## bodyLength2 0.2055 1.2927 0.159 0.87379 ## mountainRangeCentral 36.5828 3.5993 10.164 < 2e-16 *** ## mountainRangeEmmental 16.2092 3.6966 4.385 1.43e-05 *** ## mountainRangeJulian 45.1147 4.1901 10.767 < 2e-16 *** ## mountainRangeLigurian 17.7478 3.6736 4.831 1.84e-06 *** ## mountainRangeMaritime 49.8813 3.1392 15.890 < 2e-16 *** ## mountainRangeSarntal 41.9784 3.1972 13.130 < 2e-16 *** ## mountainRangeSouthern 8.5196 2.7313 3.119 0.00192 ** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 14.96 on 471 degrees of freedom ## Multiple R-squared: 0.5843, Adjusted R-squared: 0.5773 ## F-statistic: 82.76 on 8 and 471 DF, p-value: < 2.2e-16
mixed.lmer <- lmer(testScore ~ bodyLength2 + (1|mountainRange), data = dragons)
summary(mixed.lmer)
## Linear mixed model fit by REML ['lmerMod'] ## Formula: testScore ~ bodyLength2 + (1 | mountainRange) ## Data: dragons ## ## REML criterion at convergence: 3985.6 ## ## Scaled residuals: ## Min 1Q Median 3Q Max ## -3.4815 -0.6513 0.0066 0.6685 2.9583 ## ## Random effects: ## Groups Name Variance Std.Dev. ## mountainRange (Intercept) 339.7 18.43 ## Residual 223.8 14.96 ## Number of obs: 480, groups: mountainRange, 8 ## ## Fixed effects: ## Estimate Std. Error t value ## (Intercept) 50.3860 6.5517 7.690 ## bodyLength2 0.5377 1.2750 0.422 ## ## Correlation of Fixed Effects: ## (Intr) ## bodyLength2 0.000
stargazer(mixed.lmer, type = "text", digits = 3, star.cutoffs = c(0.05, 0.01, 0.001), digit.separator = "")
## ## ================================================= ## Dependent variable: ## ----------------------------- ## testScore ## ------------------------------------------------- ## bodyLength2 0.538 ## (1.275) ## ## Constant 50.386*** ## (6.552) ## ## ------------------------------------------------- ## Observations 480 ## Log Likelihood -1992.816 ## Akaike Inf. Crit. 3993.631 ## Bayesian Inf. Crit. 4010.326 ## ================================================= ## Note: *p<0.05; **p<0.01; ***p<0.001
ggplot(dragons, aes(x = bodyLength, y = testScore, colour = mountainRange)) + facet_wrap(~mountainRange, nrow=3) + geom_point() + theme_classic() + geom_line(data = cbind(dragons, pred = predict(mixed.lmer)), aes(y = pred)) + theme(legend.position = "none")
reduced.lmer <- lmer(testScore ~1 + (1|mountainRange), data = dragons) summary(reduced.lmer)
## Linear mixed model fit by REML ['lmerMod'] ## Formula: testScore ~ 1 + (1 | mountainRange) ## Data: dragons ## ## REML criterion at convergence: 3988.1 ## ## Scaled residuals: ## Min 1Q Median 3Q Max ## -3.4872 -0.6589 0.0221 0.6684 2.9740 ## ## Random effects: ## Groups Name Variance Std.Dev. ## mountainRange (Intercept) 349.1 18.68 ## Residual 223.3 14.94 ## Number of obs: 480, groups: mountainRange, 8 ## ## Fixed effects: ## Estimate Std. Error t value ## (Intercept) 50.386 6.641 7.587
anova(reduced.lmer,mixed.lmer)
## refitting model(s) with ML (instead of REML)
## Data: dragons ## Models: ## reduced.lmer: testScore ~ 1 + (1 | mountainRange) ## mixed.lmer: testScore ~ bodyLength2 + (1 | mountainRange) ## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq) ## reduced.lmer 3 3999.7 4012.2 -1996.8 3993.7 ## mixed.lmer 4 4001.5 4018.2 -1996.7 3993.5 0.2075 1 0.6488