Download the file https://goo.gl/oHWUQA
Workshop package/Data/Linear models
5 January 2016
Download the file https://goo.gl/oHWUQA
Workshop package/Data/Linear models
Use syntax
load('/User/stephanievandenberg/Dropbox/BMS R workshop/Workshop package/Data/Linear models/data_Browsing_AB')
Start this presentation at http://rpubs.com/smvandenberg/lm_slides
## Warning: package 'ape' was built under R version 3.2.3
## Warning: package 'ggplot2' was built under R version 3.2.3
## Warning: package 'GGally' was built under R version 3.2.3
| Subj | education | gender | age | Design | ToT | predict_M3 | resid_M3 |
|---|---|---|---|---|---|---|---|
| 23 | degree | m | 28.41143 | A | 62.30250 | 71.05689 | -8.7543906 |
| 24 | degree | m | 28.41143 | B | 71.14089 | 71.05689 | 0.0839968 |
| 45 | no degree | f | 38.93923 | A | 165.00313 | 127.98771 | 37.0154158 |
| 46 | no degree | f | 38.93923 | B | 176.26674 | 127.98771 | 48.2790327 |
| 67 | degree | f | 76.07241 | A | 105.24776 | 110.07658 | -4.8288145 |
| 68 | degree | f | 76.07241 | B | 112.53900 | 110.07658 | 2.4624247 |
| 89 | no degree | f | 39.65661 | A | 150.06767 | 128.57150 | 21.4961717 |
| 90 | no degree | f | 39.65661 | B | 113.33963 | 128.57150 | -15.2318657 |
| 111 | degree | m | 22.77343 | A | 159.78351 | 66.46883 | 93.3146799 |
| 112 | degree | m | 22.77343 | B | 71.04381 | 66.46883 | 4.5749828 |
ANOVA_Design <- data_Browsing_AB %>% aov(ToT ~ Design, data = .) summary(ANOVA_Design)
## Df Sum Sq Mean Sq F value Pr(>F) ## Design 1 4558 4558 2.83 0.0941 . ## Residuals 198 318858 1610 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
data_Browsing_AB %>% ggplot(aes(x = education, y = ToT, fill = Design)) + geom_boxplot()
data_Browsing_AB %>% aggregate( ToT~education+Design, . , mean )
## education Design ToT ## 1 degree A 91.80865 ## 2 no degree A 141.78354 ## 3 degree B 83.54079 ## 4 no degree B 130.95649
ANOVA_education_design <- data_Browsing_AB %>% aov(ToT ~ education + Design, data = .) summary(ANOVA_education_design)
## Df Sum Sq Mean Sq F value Pr(>F) ## education 1 118562 118562 116.610 <2e-16 *** ## Design 1 4558 4558 4.483 0.0355 * ## Residuals 197 200297 1017 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(ANOVA_education_design)
## (Intercept) educationno degree DesignB ## 92.448450 48.695291 -9.547452
data_Browsing_AB %>% aggregate( ToT~education+Design, . , mean )
## education Design ToT ## 1 degree A 91.80865 ## 2 no degree A 141.78354 ## 3 degree B 83.54079 ## 4 no degree B 130.95649
ANOVA_fullfact <- data_Browsing_AB %>% aov(ToT ~ education + Design + education:Design, data = .)
summary(ANOVA_fullfact) ; coef(ANOVA_fullfact)
## Df Sum Sq Mean Sq F value Pr(>F) ## education 1 118562 118562 116.066 <2e-16 *** ## Design 1 4558 4558 4.462 0.0359 * ## education:Design 1 82 82 0.080 0.7774 ## Residuals 196 200215 1022 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Intercept) educationno degree ## 91.808655 49.974882 ## DesignB educationno degree:DesignB ## -8.267861 -2.559182
data_Browsing_AB %>% ggplot(aes(x = age, y = ToT)) + geom_point() + geom_smooth()
LM_age <- data_Browsing_AB %>% lm(ToT ~ 1 + age, data = .) summary(LM_age)
## ## Call: ## lm(formula = ToT ~ 1 + age, data = .) ## ## Residuals: ## Min 1Q Median 3Q Max ## -88.737 -28.738 -2.309 27.947 89.035 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 70.8466 7.8210 9.058 < 2e-16 *** ## age 0.8397 0.1500 5.597 7.19e-08 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 37.55 on 198 degrees of freedom ## Multiple R-squared: 0.1366, Adjusted R-squared: 0.1323 ## F-statistic: 31.33 on 1 and 198 DF, p-value: 7.193e-08
LM_fullfact <-
data_Browsing_AB %>% lm(ToT ~ 1 + education + Design + education:Design
, data=.)
summary(LM_fullfact)
## ## Call: ## lm(formula = ToT ~ 1 + education + Design + education:Design, ## data = .) ## ## Residuals: ## Min 1Q Median 3Q Max ## -80.389 -22.628 -0.038 23.251 80.299 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 91.809 4.520 20.312 < 2e-16 *** ## educationno degree 49.975 6.392 7.818 3.22e-13 *** ## DesignB -8.268 6.392 -1.293 0.197 ## educationno degree:DesignB -2.559 9.040 -0.283 0.777 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 31.96 on 196 degrees of freedom ## Multiple R-squared: 0.3809, Adjusted R-squared: 0.3715 ## F-statistic: 40.2 on 3 and 196 DF, p-value: < 2.2e-16
coef(LM_fullfact)
## (Intercept) educationno degree ## 91.808655 49.974882 ## DesignB educationno degree:DesignB ## -8.267861 -2.559182
coef(ANOVA_fullfact)
## (Intercept) educationno degree ## 91.808655 49.974882 ## DesignB educationno degree:DesignB ## -8.267861 -2.559182
M3 <- data_Browsing_AB %>% lm( ToT ~ 1 + age + education + gender, data = .) summary(M3)
## ## Call: ## lm(formula = ToT ~ 1 + age + education + gender, data = .) ## ## Residuals: ## Min 1Q Median 3Q Max ## -70.207 -20.091 0.067 17.468 93.315 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 48.1707 6.8017 7.082 2.48e-11 *** ## age 0.8138 0.1154 7.050 2.99e-11 *** ## educationno degree 48.1292 4.1036 11.729 < 2e-16 *** ## genderm -0.2343 4.1072 -0.057 0.955 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 28.87 on 196 degrees of freedom ## Multiple R-squared: 0.495, Adjusted R-squared: 0.4872 ## F-statistic: 64.03 on 3 and 196 DF, p-value: < 2.2e-16
M_ridiculous <-
data_Browsing_AB %>%
lm( ToT ~ 1 + ( age + education + gender + Design)^4,
data = .)
summary(M_ridiculous)$coefficients [,c(1,4)]
## Estimate Pr(>|t|) ## (Intercept) 93.9001070 1.870167e-07 ## age 0.0525118 8.785377e-01 ## educationno degree -15.0197860 5.224265e-01 ## genderm -2.5760946 9.081278e-01 ## DesignB -68.5707862 5.694932e-03 ## age:educationno degree 1.1445826 1.156574e-02 ## age:genderm -0.1155961 7.896880e-01 ## age:DesignB 1.1619846 1.764558e-02 ## educationno degree:genderm 46.2295763 1.565939e-01 ## educationno degree:DesignB 39.4811746 2.351377e-01 ## genderm:DesignB 21.3415639 4.992897e-01 ## age:educationno degree:genderm -0.6400597 3.116495e-01 ## age:educationno degree:DesignB -0.7782987 2.216082e-01 ## age:genderm:DesignB -0.2971787 6.278587e-01 ## educationno degree:genderm:DesignB -58.9175425 2.015040e-01 ## age:educationno degree:genderm:DesignB 1.0678506 2.328924e-01
M_lessridiculous <-
data_Browsing_AB %>%
lm( ToT ~ 1 + (age + education + gender + Design)^3,
data = .)
summary(M_lessridiculous)$coefficients[,c(1,4)]
## Estimate Pr(>|t|) ## (Intercept) 86.3734250 2.679385e-07 ## age 0.2104618 5.077195e-01 ## educationno degree -1.7121244 9.340330e-01 ## genderm 9.5722892 6.305695e-01 ## DesignB -53.5174222 1.187420e-02 ## age:educationno degree 0.8744501 2.549851e-02 ## age:genderm -0.3668520 3.341991e-01 ## age:DesignB 0.8460845 3.933877e-02 ## educationno degree:genderm 20.3151378 4.035751e-01 ## educationno degree:DesignB 12.8658513 6.017243e-01 ## genderm:DesignB -2.9552037 9.027354e-01 ## age:educationno degree:genderm -0.1061344 8.124233e-01 ## age:educationno degree:DesignB -0.2380336 5.946687e-01 ## age:genderm:DesignB 0.2053332 6.456632e-01 ## educationno degree:genderm:DesignB -7.0886655 6.463750e-01
M_reasonable <-
data_Browsing_AB %>%
lm( ToT ~ 1 + (age + education + gender + Design)^2,
data = .)
summary(M_reasonable)$coefficients[,c(1,4)]
## Estimate Pr(>|t|) ## (Intercept) 84.0830023 7.938233e-11 ## age 0.2393769 2.966175e-01 ## educationno degree 8.7321025 4.953211e-01 ## genderm 8.5360261 4.977448e-01 ## DesignB -51.9289037 5.691257e-05 ## age:educationno degree 0.7017360 1.784847e-03 ## age:genderm -0.3141304 1.569879e-01 ## age:DesignB 0.8510494 9.911922e-05 ## educationno degree:genderm 11.6194996 1.303931e-01 ## educationno degree:DesignB -2.7319742 7.198764e-01 ## genderm:DesignB 3.9470652 6.047568e-01
data_Browsing_AB %>%
ggplot(aes(x = age, y = ToT,
shape = education, col= Design, size=10)) +
geom_point() + geom_smooth( method='lm', se=F)
data_Browsing_AB %>%
mutate( pred_Mfull2 = predict(M_reasonable )) %>%
ggplot(aes(x = age, y = pred_Mfull2,
shape = education, col= Design, size=10)) + geom_point()
y ~ a:b:c =
y ~ 1 + a + b + c + a:b + a:c + a:c + a:b:c
Variables specified as a factor are analysed as categorical predictors, even when labelled with numbers. So be careful when you have more than two levels in a variable. Changing a numeric variable into a factor can be done by as.factor(variable_name). Always check the degrees of freedom in your output.