Handout for week 2 practical

ANOVA script

You’ll see the first thing I did was make sure there was nothing in R’s memory (rm command). Then I put in all the libraries I would need. That’s just a stylistic preference.

# clear the decks
rm(list = ls())

# Libraries
library("ggplot2")

obesity <- read.csv("~/Dropbox/Teaching/second_year_stats/r_sessions/2.multiple_explanatory/obesity.csv")



ggplot(obesity, aes(x = HT, y = FOREARM)) +
  geom_point() +
  theme_bw()

ggplot(obesity, aes(x = WT, y = FOREARM)) +
  geom_point() +
  theme_bw()

summary(lm(FOREARM~HT, data = obesity))
## 
## Call:
## lm(formula = FOREARM ~ HT, data = obesity)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5203 -1.5167 -0.3876  1.4310  4.9971 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.95880    9.82623   0.098    0.923
## HT           0.02605    0.06219   0.419    0.678
## 
## Residual standard error: 2.32 on 37 degrees of freedom
## Multiple R-squared:  0.004719,   Adjusted R-squared:  -0.02218 
## F-statistic: 0.1754 on 1 and 37 DF,  p-value: 0.6778
anova(lm(FOREARM~HT, data = obesity))
## Analysis of Variance Table
## 
## Response: FOREARM
##           Df  Sum Sq Mean Sq F value Pr(>F)
## HT         1   0.944  0.9439  0.1754 0.6778
## Residuals 37 199.094  5.3809
summary(lm(FOREARM~WT, data = obesity))
## 
## Call:
## lm(formula = FOREARM ~ WT, data = obesity)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4386 -1.3166 -0.5204  0.6831  5.1331 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.59476    2.47273  -1.858 0.071115 .  
## WT           0.15298    0.03882   3.941 0.000347 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.951 on 37 degrees of freedom
## Multiple R-squared:  0.2956, Adjusted R-squared:  0.2766 
## F-statistic: 15.53 on 1 and 37 DF,  p-value: 0.000347
anova(lm(FOREARM~WT, data = obesity))
## Analysis of Variance Table
## 
## Response: FOREARM
##           Df  Sum Sq Mean Sq F value   Pr(>F)    
## WT         1  59.137  59.137  15.529 0.000347 ***
## Residuals 37 140.901   3.808                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(FOREARM~WT+HT, data = obesity))
## Analysis of Variance Table
## 
## Response: FOREARM
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## WT         1  59.137  59.137 18.3334 0.0001314 ***
## HT         1  24.777  24.777  7.6813 0.0087755 ** 
## Residuals 36 116.124   3.226                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(car)
## Loading required package: carData
Anova(lm(FOREARM~WT+HT, data = obesity))
## Anova Table (Type II tests)
## 
## Response: FOREARM
##            Sum Sq Df F value    Pr(>F)    
## WT         82.970  1 25.7220 1.207e-05 ***
## HT         24.777  1  7.6813  0.008775 ** 
## Residuals 116.124 36                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm(FOREARM~WT+HT, data = obesity))
## 
## Call:
## lm(formula = FOREARM ~ WT + HT, data = obesity)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3773 -1.2767 -0.1666  0.5143  4.9040 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 17.45175    8.27383   2.109  0.04194 *  
## WT           0.23317    0.04598   5.072 1.21e-05 ***
## HT          -0.17173    0.06196  -2.772  0.00878 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.796 on 36 degrees of freedom
## Multiple R-squared:  0.4195, Adjusted R-squared:  0.3872 
## F-statistic: 13.01 on 2 and 36 DF,  p-value: 5.605e-05
obesity$residual_wt<-residuals(aov(lm(FOREARM~WT, data = obesity)))


ggplot(obesity, aes(x = HT, y = residual_wt)) +
  geom_point() +
  theme_bw()