Download the file https://goo.gl/oHWUQA
30 March 2016
Download the file https://goo.gl/oHWUQA
Use syntax
load('/User/stephanievandenberg/.../data_Browsing_AB')
Start this presentation at http://rpubs.com/smvandenberg/lm_slides
## Warning: package 'ape' was built under R version 3.2.3
## Warning: package 'ggplot2' was built under R version 3.2.3
## Warning: package 'GGally' was built under R version 3.2.3
## Subj education gender age Design ToT predict_M3 resid_M3 ## 1 23 degree m 28.41143 A 62.30250 71.05689 -8.75439061 ## 2 24 degree m 28.41143 B 71.14089 71.05689 0.08399684 ## 3 45 no degree f 38.93923 A 165.00313 127.98771 37.01541581 ## 4 46 no degree f 38.93923 B 176.26674 127.98771 48.27903273 ## 5 67 degree f 76.07241 A 105.24776 110.07658 -4.82881451 ## 6 68 degree f 76.07241 B 112.53900 110.07658 2.46242469 ## 7 89 no degree f 39.65661 A 150.06767 128.57150 21.49617168 ## 8 90 no degree f 39.65661 B 113.33963 128.57150 -15.23186566 ## 9 111 degree m 22.77343 A 159.78351 66.46883 93.31467989 ## 10 112 degree m 22.77343 B 71.04381 66.46883 4.57498281
# plot data
data_Browsing_AB %>% ggplot(aes(x = ToT)) +
geom_histogram(fill = 1, alpha = .5) + facet_grid(Design~.)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ANOVA_Design <- data_Browsing_AB %>% aov(ToT ~ Design, data = .) summary(ANOVA_Design)
## Df Sum Sq Mean Sq F value Pr(>F) ## Design 1 4558 4558 2.83 0.0941 . ## Residuals 198 318858 1610 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ANOVA_Design <- aov(ToT ~ Design, data = data_Browsing_AB) summary(ANOVA_Design)
## Df Sum Sq Mean Sq F value Pr(>F) ## Design 1 4558 4558 2.83 0.0941 . ## Residuals 198 318858 1610 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
data_Browsing_AB %>% ggplot(aes(x = education, y = ToT, fill = Design)) + geom_boxplot()
data_Browsing_AB %>% aggregate( ToT~education+Design, . , mean )
## education Design ToT ## 1 degree A 91.80865 ## 2 no degree A 141.78354 ## 3 degree B 83.54079 ## 4 no degree B 130.95649
ANOVA_education_design <- data_Browsing_AB %>% aov(ToT ~ education + Design, data = .) summary(ANOVA_education_design)
## Df Sum Sq Mean Sq F value Pr(>F) ## education 1 118562 118562 116.610 <2e-16 *** ## Design 1 4558 4558 4.483 0.0355 * ## Residuals 197 200297 1017 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ANOVA_education_design <- aov(ToT ~ education + Design, data = data_Browsing_AB ) summary(ANOVA_education_design)
## Df Sum Sq Mean Sq F value Pr(>F) ## education 1 118562 118562 116.610 <2e-16 *** ## Design 1 4558 4558 4.483 0.0355 * ## Residuals 197 200297 1017 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(ANOVA_education_design)
## (Intercept) educationno degree DesignB ## 92.448450 48.695291 -9.547452
data_Browsing_AB %>% aggregate( ToT~education+Design, . , mean )
## education Design ToT ## 1 degree A 91.80865 ## 2 no degree A 141.78354 ## 3 degree B 83.54079 ## 4 no degree B 130.95649
data_Browsing_AB %>% mutate( predicted = predict(ANOVA_education_design)) %>%
ggplot( aes(x=education, y=ToT, fill=Design, col=Design)) +
geom_point(aes(y=predicted, size=10))
ANOVA_fullfact <- data_Browsing_AB %>% aov(ToT ~ education + Design + education:Design, data = .)
summary(ANOVA_fullfact) ; coef(ANOVA_fullfact)
## Df Sum Sq Mean Sq F value Pr(>F) ## education 1 118562 118562 116.066 <2e-16 *** ## Design 1 4558 4558 4.462 0.0359 * ## education:Design 1 82 82 0.080 0.7774 ## Residuals 196 200215 1022 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Intercept) educationno degree ## 91.808655 49.974882 ## DesignB educationno degree:DesignB ## -8.267861 -2.559182
coef(ANOVA_fullfact)
## (Intercept) educationno degree ## 91.808655 49.974882 ## DesignB educationno degree:DesignB ## -8.267861 -2.559182
contrasts(data_Browsing_AB$education)
## no degree ## degree 0 ## no degree 1
contrasts(data_Browsing_AB$Design)
## B ## A 0 ## B 1
## education Design TOT ## 1 0 0 0.1430084 ## 2 0 1 0.7310363 ## 3 0 0 1.0435934 ## 4 1 1 1.1577183 ## 5 1 0 -0.7798004 ## 6 1 1 0.7102248
## education Design TOT interaction ## 1 0 0 -0.8464497 0 ## 2 0 1 0.6850992 0 ## 3 0 0 -0.4572138 0 ## 4 1 1 -1.4051543 1 ## 5 1 0 0.7860801 0 ## 6 1 1 0.1096235 1
data_Browsing_AB %>% ggplot(aes(x = age, y = ToT)) + geom_point() + geom_smooth()
LM_age <- data_Browsing_AB %>% lm(ToT ~ 1 + age, data = .) summary(LM_age)
## ## Call: ## lm(formula = ToT ~ 1 + age, data = .) ## ## Residuals: ## Min 1Q Median 3Q Max ## -88.737 -28.738 -2.309 27.947 89.035 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 70.8466 7.8210 9.058 < 2e-16 *** ## age 0.8397 0.1500 5.597 7.19e-08 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 37.55 on 198 degrees of freedom ## Multiple R-squared: 0.1366, Adjusted R-squared: 0.1323 ## F-statistic: 31.33 on 1 and 198 DF, p-value: 7.193e-08
confint(LM_age)
## 2.5 % 97.5 % ## (Intercept) 55.4234235 86.269787 ## age 0.5438301 1.135484
LM_fullfact <-
data_Browsing_AB %>% lm(ToT ~ 1 + education + Design + education:Design
, data=.)
summary(LM_fullfact)
## ## Call: ## lm(formula = ToT ~ 1 + education + Design + education:Design, ## data = .) ## ## Residuals: ## Min 1Q Median 3Q Max ## -80.389 -22.628 -0.038 23.251 80.299 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 91.809 4.520 20.312 < 2e-16 *** ## educationno degree 49.975 6.392 7.818 3.22e-13 *** ## DesignB -8.268 6.392 -1.293 0.197 ## educationno degree:DesignB -2.559 9.040 -0.283 0.777 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 31.96 on 196 degrees of freedom ## Multiple R-squared: 0.3809, Adjusted R-squared: 0.3715 ## F-statistic: 40.2 on 3 and 196 DF, p-value: < 2.2e-16
coef(LM_fullfact)
## (Intercept) educationno degree ## 91.808655 49.974882 ## DesignB educationno degree:DesignB ## -8.267861 -2.559182
coef(ANOVA_fullfact)
## (Intercept) educationno degree ## 91.808655 49.974882 ## DesignB educationno degree:DesignB ## -8.267861 -2.559182
M3 <- data_Browsing_AB %>% lm( ToT ~ 1 + age + education + gender, data = .) summary(M3)
## ## Call: ## lm(formula = ToT ~ 1 + age + education + gender, data = .) ## ## Residuals: ## Min 1Q Median 3Q Max ## -70.207 -20.091 0.067 17.468 93.315 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 48.1707 6.8017 7.082 2.48e-11 *** ## age 0.8138 0.1154 7.050 2.99e-11 *** ## educationno degree 48.1292 4.1036 11.729 < 2e-16 *** ## genderm -0.2343 4.1072 -0.057 0.955 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 28.87 on 196 degrees of freedom ## Multiple R-squared: 0.495, Adjusted R-squared: 0.4872 ## F-statistic: 64.03 on 3 and 196 DF, p-value: < 2.2e-16
M_ridiculous <-
data_Browsing_AB %>%
lm( ToT ~ 1 + ( age + education + gender + Design)^4,
data = .)
confint(M_ridiculous)
## 2.5 % 97.5 % ## (Intercept) 59.7068744 128.0933396 ## age -0.6244714 0.7294950 ## educationno degree -61.2611588 31.2215867 ## genderm -46.5576413 41.4054521 ## DesignB -116.9273195 -20.2142529 ## age:educationno degree 0.2592501 2.0299152 ## age:genderm -0.9694354 0.7382433 ## age:DesignB 0.2045858 2.1193833 ## educationno degree:genderm -17.8917376 110.3508903 ## educationno degree:DesignB -25.9140019 104.8763511 ## genderm:DesignB -40.8577359 83.5408637 ## age:educationno degree:genderm -1.8847413 0.6046219 ## age:educationno degree:DesignB -2.0303480 0.4737507 ## age:genderm:DesignB -1.5046898 0.9103325 ## educationno degree:genderm:DesignB -149.5987743 31.7636893 ## age:educationno degree:genderm:DesignB -0.6923949 2.8280962
M_lessridiculous <-
data_Browsing_AB %>%
lm( ToT ~ 1 + (age + education + gender + Design)^3,
data = .)
summary(M_lessridiculous)$coefficients[,c(1,4)]
## Estimate Pr(>|t|) ## (Intercept) 86.3734250 2.679385e-07 ## age 0.2104618 5.077195e-01 ## educationno degree -1.7121244 9.340330e-01 ## genderm 9.5722892 6.305695e-01 ## DesignB -53.5174222 1.187420e-02 ## age:educationno degree 0.8744501 2.549851e-02 ## age:genderm -0.3668520 3.341991e-01 ## age:DesignB 0.8460845 3.933877e-02 ## educationno degree:genderm 20.3151378 4.035751e-01 ## educationno degree:DesignB 12.8658513 6.017243e-01 ## genderm:DesignB -2.9552037 9.027354e-01 ## age:educationno degree:genderm -0.1061344 8.124233e-01 ## age:educationno degree:DesignB -0.2380336 5.946687e-01 ## age:genderm:DesignB 0.2053332 6.456632e-01 ## educationno degree:genderm:DesignB -7.0886655 6.463750e-01
M_reasonable <-
data_Browsing_AB %>%
lm( ToT ~ 1 + (age + education + gender + Design)^2,
data = .)
coef(M_reasonable)
## (Intercept) age ## 84.0830023 0.2393769 ## educationno degree genderm ## 8.7321025 8.5360261 ## DesignB age:educationno degree ## -51.9289037 0.7017360 ## age:genderm age:DesignB ## -0.3141304 0.8510494 ## educationno degree:genderm educationno degree:DesignB ## 11.6194996 -2.7319742 ## genderm:DesignB ## 3.9470652
data_Browsing_AB %>%
ggplot(aes(x = age, y = ToT,
shape = education, col= Design, size=10)) +
geom_point() + geom_smooth( method='lm', se=F)
data_Browsing_AB %>%
mutate( pred_Mfull2 = predict(M_reasonable )) %>%
ggplot(aes(x = age, y = pred_Mfull2,
shape = education, col= Design, size=10)) + geom_point()
y ~ axbxc =
y ~ 1 + a + b + c + a:b + a:c + a:c + a:b:c
Variables specified as a factor are analysed as categorical predictors, even when labelled with numbers. So be careful when you have more than two levels in a variable. Changing a numeric variable into a factor can be done by as.factor(variable_name). Always check the degrees of freedom in your output.