30 March 2016

The data file to be used

Load data into your workspace

Use syntax

load('/User/stephanievandenberg/.../data_Browsing_AB')

Slides with syntax

The experiment

## Warning: package 'ape' was built under R version 3.2.3
## Warning: package 'ggplot2' was built under R version 3.2.3
## Warning: package 'GGally' was built under R version 3.2.3
##    Subj education gender      age Design       ToT predict_M3     resid_M3
## 1    23    degree      m 28.41143      A  62.30250   71.05689  -8.75439061
## 2    24    degree      m 28.41143      B  71.14089   71.05689   0.08399684
## 3    45 no degree      f 38.93923      A 165.00313  127.98771  37.01541581
## 4    46 no degree      f 38.93923      B 176.26674  127.98771  48.27903273
## 5    67    degree      f 76.07241      A 105.24776  110.07658  -4.82881451
## 6    68    degree      f 76.07241      B 112.53900  110.07658   2.46242469
## 7    89 no degree      f 39.65661      A 150.06767  128.57150  21.49617168
## 8    90 no degree      f 39.65661      B 113.33963  128.57150 -15.23186566
## 9   111    degree      m 22.77343      A 159.78351   66.46883  93.31467989
## 10  112    degree      m 22.77343      B  71.04381   66.46883   4.57498281

Visualize data

# plot data
data_Browsing_AB %>%  ggplot(aes(x = ToT)) +
        geom_histogram(fill = 1, alpha = .5) + facet_grid(Design~.)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

One-way ANOVA

ANOVA_Design <-
  data_Browsing_AB %>% 
  aov(ToT ~ Design, data = .)

summary(ANOVA_Design) 
##              Df Sum Sq Mean Sq F value Pr(>F)  
## Design        1   4558    4558    2.83 0.0941 .
## Residuals   198 318858    1610                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

One-way ANOVA

ANOVA_Design <-  aov(ToT ~ Design, data = data_Browsing_AB)

summary(ANOVA_Design) 
##              Df Sum Sq Mean Sq F value Pr(>F)  
## Design        1   4558    4558    2.83 0.0941 .
## Residuals   198 318858    1610                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Two-factorial ANOVA (1)

data_Browsing_AB %>%
  ggplot(aes(x = education, y = ToT, fill = Design)) +
  geom_boxplot()

data_Browsing_AB %>% aggregate( ToT~education+Design, . ,  mean )
##   education Design       ToT
## 1    degree      A  91.80865
## 2 no degree      A 141.78354
## 3    degree      B  83.54079
## 4 no degree      B 130.95649

Two-factorial ANOVA (2)

ANOVA_education_design <- data_Browsing_AB %>% 
  aov(ToT ~ education + Design,  data = .)
summary(ANOVA_education_design) 
##              Df Sum Sq Mean Sq F value Pr(>F)    
## education     1 118562  118562 116.610 <2e-16 ***
## Design        1   4558    4558   4.483 0.0355 *  
## Residuals   197 200297    1017                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Two-factorial ANOVA (2)

ANOVA_education_design <- aov(ToT ~ education + Design,  data = data_Browsing_AB )
summary(ANOVA_education_design) 
##              Df Sum Sq Mean Sq F value Pr(>F)    
## education     1 118562  118562 116.610 <2e-16 ***
## Design        1   4558    4558   4.483 0.0355 *  
## Residuals   197 200297    1017                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ANOVA model coefficients

coef(ANOVA_education_design) 
##        (Intercept) educationno degree            DesignB 
##          92.448450          48.695291          -9.547452
data_Browsing_AB %>% aggregate( ToT~education+Design, . ,  mean )
##   education Design       ToT
## 1    degree      A  91.80865
## 2 no degree      A 141.78354
## 3    degree      B  83.54079
## 4 no degree      B 130.95649

ANOVA model coefficients

data_Browsing_AB %>% mutate( predicted = predict(ANOVA_education_design)) %>%
        ggplot( aes(x=education, y=ToT, fill=Design, col=Design)) +
        geom_point(aes(y=predicted, size=10)) 

Full factorial ANOVA (1)

ANOVA_fullfact <- 
  data_Browsing_AB %>% 
  aov(ToT ~  education + Design + education:Design,  data = .)

Full factorial ANOVA (2)

summary(ANOVA_fullfact) ; coef(ANOVA_fullfact)
##                   Df Sum Sq Mean Sq F value Pr(>F)    
## education          1 118562  118562 116.066 <2e-16 ***
## Design             1   4558    4558   4.462 0.0359 *  
## education:Design   1     82      82   0.080 0.7774    
## Residuals        196 200215    1022                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##                (Intercept)         educationno degree 
##                  91.808655                  49.974882 
##                    DesignB educationno degree:DesignB 
##                  -8.267861                  -2.559182

Full factorial ANOVA (2)

 coef(ANOVA_fullfact)
##                (Intercept)         educationno degree 
##                  91.808655                  49.974882 
##                    DesignB educationno degree:DesignB 
##                  -8.267861                  -2.559182
contrasts(data_Browsing_AB$education)
##           no degree
## degree            0
## no degree         1
contrasts(data_Browsing_AB$Design)
##   B
## A 0
## B 1

ANOVA = linear regression

##   education Design        TOT
## 1         0      0  0.1430084
## 2         0      1  0.7310363
## 3         0      0  1.0435934
## 4         1      1  1.1577183
## 5         1      0 -0.7798004
## 6         1      1  0.7102248

ANOVA = linear regression

##   education Design        TOT interaction
## 1         0      0 -0.8464497           0
## 2         0      1  0.6850992           0
## 3         0      0 -0.4572138           0
## 4         1      1 -1.4051543           1
## 5         1      0  0.7860801           0
## 6         1      1  0.1096235           1

Linear regression (1)

data_Browsing_AB %>%
  ggplot(aes(x = age, y = ToT)) +  geom_point() + geom_smooth()

Linear regression (2)

LM_age <- data_Browsing_AB %>% 
  lm(ToT ~ 1 + age, data = .) 
summary(LM_age)
## 
## Call:
## lm(formula = ToT ~ 1 + age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -88.737 -28.738  -2.309  27.947  89.035 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  70.8466     7.8210   9.058  < 2e-16 ***
## age           0.8397     0.1500   5.597 7.19e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 37.55 on 198 degrees of freedom
## Multiple R-squared:  0.1366, Adjusted R-squared:  0.1323 
## F-statistic: 31.33 on 1 and 198 DF,  p-value: 7.193e-08

Linear regression (3)

confint(LM_age)
##                  2.5 %    97.5 %
## (Intercept) 55.4234235 86.269787
## age          0.5438301  1.135484

Full factorial regression analysis (1)

LM_fullfact <- 
 data_Browsing_AB %>% lm(ToT ~ 1 + education + Design + education:Design
                                , data=.)

Full factorial regression analysis (2)

summary(LM_fullfact)
## 
## Call:
## lm(formula = ToT ~ 1 + education + Design + education:Design, 
##     data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -80.389 -22.628  -0.038  23.251  80.299 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  91.809      4.520  20.312  < 2e-16 ***
## educationno degree           49.975      6.392   7.818 3.22e-13 ***
## DesignB                      -8.268      6.392  -1.293    0.197    
## educationno degree:DesignB   -2.559      9.040  -0.283    0.777    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31.96 on 196 degrees of freedom
## Multiple R-squared:  0.3809, Adjusted R-squared:  0.3715 
## F-statistic:  40.2 on 3 and 196 DF,  p-value: < 2.2e-16

Same results from aov and lm

coef(LM_fullfact)
##                (Intercept)         educationno degree 
##                  91.808655                  49.974882 
##                    DesignB educationno degree:DesignB 
##                  -8.267861                  -2.559182
coef(ANOVA_fullfact)
##                (Intercept)         educationno degree 
##                  91.808655                  49.974882 
##                    DesignB educationno degree:DesignB 
##                  -8.267861                  -2.559182

Mixing quant. and qual. predictors

M3 <- data_Browsing_AB %>% lm( ToT ~ 1 + age + education + gender,  data = .)
summary(M3)
## 
## Call:
## lm(formula = ToT ~ 1 + age + education + gender, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -70.207 -20.091   0.067  17.468  93.315 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         48.1707     6.8017   7.082 2.48e-11 ***
## age                  0.8138     0.1154   7.050 2.99e-11 ***
## educationno degree  48.1292     4.1036  11.729  < 2e-16 ***
## genderm             -0.2343     4.1072  -0.057    0.955    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 28.87 on 196 degrees of freedom
## Multiple R-squared:  0.495,  Adjusted R-squared:  0.4872 
## F-statistic: 64.03 on 3 and 196 DF,  p-value: < 2.2e-16

Ridiculous model (1)

M_ridiculous <- 
data_Browsing_AB %>%
lm( ToT ~ 1 + ( age  + education + gender + Design)^4,         
    data = .)

Ridiculous model (2)

confint(M_ridiculous)
##                                               2.5 %      97.5 %
## (Intercept)                              59.7068744 128.0933396
## age                                      -0.6244714   0.7294950
## educationno degree                      -61.2611588  31.2215867
## genderm                                 -46.5576413  41.4054521
## DesignB                                -116.9273195 -20.2142529
## age:educationno degree                    0.2592501   2.0299152
## age:genderm                              -0.9694354   0.7382433
## age:DesignB                               0.2045858   2.1193833
## educationno degree:genderm              -17.8917376 110.3508903
## educationno degree:DesignB              -25.9140019 104.8763511
## genderm:DesignB                         -40.8577359  83.5408637
## age:educationno degree:genderm           -1.8847413   0.6046219
## age:educationno degree:DesignB           -2.0303480   0.4737507
## age:genderm:DesignB                      -1.5046898   0.9103325
## educationno degree:genderm:DesignB     -149.5987743  31.7636893
## age:educationno degree:genderm:DesignB   -0.6923949   2.8280962

Less ridiculous model (1)

M_lessridiculous <- 
data_Browsing_AB %>%
lm( ToT ~ 1 + (age + education + gender + Design)^3,         
    data = .)

Less ridiculous model (2)

summary(M_lessridiculous)$coefficients[,c(1,4)]
##                                       Estimate     Pr(>|t|)
## (Intercept)                         86.3734250 2.679385e-07
## age                                  0.2104618 5.077195e-01
## educationno degree                  -1.7121244 9.340330e-01
## genderm                              9.5722892 6.305695e-01
## DesignB                            -53.5174222 1.187420e-02
## age:educationno degree               0.8744501 2.549851e-02
## age:genderm                         -0.3668520 3.341991e-01
## age:DesignB                          0.8460845 3.933877e-02
## educationno degree:genderm          20.3151378 4.035751e-01
## educationno degree:DesignB          12.8658513 6.017243e-01
## genderm:DesignB                     -2.9552037 9.027354e-01
## age:educationno degree:genderm      -0.1061344 8.124233e-01
## age:educationno degree:DesignB      -0.2380336 5.946687e-01
## age:genderm:DesignB                  0.2053332 6.456632e-01
## educationno degree:genderm:DesignB  -7.0886655 6.463750e-01

Reasonable model (1)

M_reasonable <- 
data_Browsing_AB %>%
lm( ToT ~ 1 + (age + education + gender + Design)^2,         
    data = .)

Reasonable model (2)

coef(M_reasonable)
##                (Intercept)                        age 
##                 84.0830023                  0.2393769 
##         educationno degree                    genderm 
##                  8.7321025                  8.5360261 
##                    DesignB     age:educationno degree 
##                -51.9289037                  0.7017360 
##                age:genderm                age:DesignB 
##                 -0.3141304                  0.8510494 
## educationno degree:genderm educationno degree:DesignB 
##                 11.6194996                 -2.7319742 
##            genderm:DesignB 
##                  3.9470652

Observed ToTs (1)

data_Browsing_AB %>%
        ggplot(aes(x = age,  y = ToT, 
        shape = education, col= Design, size=10)) +
        geom_point() + geom_smooth( method='lm', se=F)

Observed ToTs (2)

Observed ToTs (3)

Predicted ToTs

data_Browsing_AB %>%
        mutate(   pred_Mfull2 = predict(M_reasonable )) %>%
        ggplot(aes(x = age, y = pred_Mfull2, 
    shape = education, col= Design, size=10)) + geom_point() 

Concluding remarks

  • y ~ axbxc =
    y ~ 1 + a + b + c + a:b + a:c + a:c + a:b:c

  • Variables specified as a factor are analysed as categorical predictors, even when labelled with numbers. So be careful when you have more than two levels in a variable. Changing a numeric variable into a factor can be done by as.factor(variable_name). Always check the degrees of freedom in your output.