pirates <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/05/pirate_survey_noerrors.txt", sep = "\t", header = T, stringsAsFactors = F)
pairs(~ age + tattoos + tchests.found + parrots.lifetime + sword.speed, data = pirates)
model.1 <- lm(tchests.found ~ parrots.lifetime + age + tattoos,
data = pirates)
summary(model.1)
##
## Call:
## lm(formula = tchests.found ~ parrots.lifetime + age + tattoos,
## data = pirates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.566 -5.225 -2.271 2.636 46.003
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.266327 1.407182 0.900 0.368389
## parrots.lifetime -0.007083 0.088349 -0.080 0.936115
## age 0.123838 0.044491 2.783 0.005480 **
## tattoos 0.274414 0.074297 3.693 0.000233 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.785 on 996 degrees of freedom
## Multiple R-squared: 0.02135, Adjusted R-squared: 0.0184
## F-statistic: 7.241 on 3 and 996 DF, p-value: 8.283e-05
# F (3, 996) = 7.241, p < 0.01, R2 = .018.
# for parrots, the results was non-significant (t(996) = -0.08, p = .936)
# for age, the results were significant (t(996) = 2.783, p < .01)
# for tattoos, the results were significant (t(996) = 3.693, p < .01 )
Conclusion: The independent variables age and tattoos seem to reliably predict the number of treasure chests a pirate has found.
plot(x = pirates$tchests.found,
y = model.1$fitted.values,
xlab = "True Amount of Treasure Chests Found",
ylab = "Model Amount of Chests Found",
main = "Treasure Chests Found\n True versus Model",
pch = 16,
col = gray(.05, .15),
abline(a = 0, b = 1) #add diagonal line
)
female.parrots <- subset(pirates, subset = sex == "female" & parrots.lifetime < 5)
model.1 <- lm(tchests.found ~ parrots.lifetime + age + tattoos,
data = female.parrots)
summary(model.1)
##
## Call:
## lm(formula = tchests.found ~ parrots.lifetime + age + tattoos,
## data = female.parrots)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.426 -4.722 -2.113 2.851 44.026
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.72969 2.61996 -1.042 0.29821
## parrots.lifetime -0.18443 0.29642 -0.622 0.53423
## age 0.25240 0.08075 3.126 0.00193 **
## tattoos 0.28510 0.11336 2.515 0.01237 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.258 on 338 degrees of freedom
## Multiple R-squared: 0.04689, Adjusted R-squared: 0.03843
## F-statistic: 5.542 on 3 and 338 DF, p-value: 0.001006
# F (3, 338) = 5.542, p < 0.01, R2 = .038.
# for parrots, the results was non-significant (t(338) = -1.042, p = .30)
# for age, the results were significant (t(338) = 3.126, p < .01)
# for tattoos, the results were significant (t(338) = 2.515, p = .012)
model.3 <- lm(sword.speed ~ headband, data = pirates)
summary(model.3)
##
## Call:
## lm(formula = sword.speed ~ headband, data = pirates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.658 -0.895 -0.576 0.063 43.483
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6576 0.2553 6.494 1.32e-10 ***
## headbandyes -0.5449 0.2686 -2.029 0.0428 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.514 on 998 degrees of freedom
## Multiple R-squared: 0.004107, Adjusted R-squared: 0.003109
## F-statistic: 4.115 on 1 and 998 DF, p-value: 0.04276
anova(model.3)
## Analysis of Variance Table
##
## Response: sword.speed
## Df Sum Sq Mean Sq F value Pr(>F)
## headband 1 26.0 26.0079 4.1152 0.04276 *
## Residuals 998 6307.3 6.3199
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Wether or not a pirate wears a headband does have a significant influence on the swordspeed. Wearing a headband leads to a greater sword speed.
model.4 <- lm(sword.speed ~ headband + sword.type,
data = pirates)
summary(model.4)
##
## Call:
## lm(formula = sword.speed ~ headband + sword.type, data = pirates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.304 -0.564 -0.261 0.249 36.805
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.8331 0.3433 11.164 < 2e-16 ***
## headbandyes 3.9581 0.3044 13.003 < 2e-16 ***
## sword.typecutlass -7.0595 0.3967 -17.796 < 2e-16 ***
## sword.typesabre -3.3909 0.4250 -7.978 4.06e-15 ***
## sword.typescimitar -1.5348 0.4317 -3.556 0.000395 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.063 on 995 degrees of freedom
## Multiple R-squared: 0.3314, Adjusted R-squared: 0.3287
## F-statistic: 123.3 on 4 and 995 DF, p-value: < 2.2e-16
anova(model.4)
## Analysis of Variance Table
##
## Response: sword.speed
## Df Sum Sq Mean Sq F value Pr(>F)
## headband 1 26.0 26.01 6.1116 0.0136 *
## sword.type 3 2073.1 691.02 162.3837 <2e-16 ***
## Residuals 995 4234.2 4.26
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Sword type and headband both have a significant influence on the sword speed. But there also is an interaction between sword type and headband.
model.5 <- lm(sword.speed ~ headband * sex,
data = pirates)
summary(model.5)
##
## Call:
## lm(formula = sword.speed ~ headband * sex, data = pirates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.721 -0.894 -0.582 0.065 43.452
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.72108 0.35613 4.833 1.56e-06 ***
## headbandyes -0.62761 0.37687 -1.665 0.0962 .
## sexmale -0.04571 0.53419 -0.086 0.9318
## sexother -0.61876 1.01623 -0.609 0.5427
## headbandyes:sexmale 0.09614 0.56089 0.171 0.8639
## headbandyes:sexother 0.45839 1.11105 0.413 0.6800
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.518 on 994 degrees of freedom
## Multiple R-squared: 0.004748, Adjusted R-squared: -0.0002582
## F-statistic: 0.9484 on 5 and 994 DF, p-value: 0.4487
anova(model.5)
## Analysis of Variance Table
##
## Response: sword.speed
## Df Sum Sq Mean Sq F value Pr(>F)
## headband 1 26.0 26.0079 4.1014 0.04312 *
## sex 2 2.9 1.4719 0.2321 0.79290
## headband:sex 2 1.1 0.5597 0.0883 0.91553
## Residuals 994 6303.2 6.3413
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# When predicting a pirate's sword speed, the interaction between sex and headband does not have predictive value (p-values for sexmale:headbandyes and sexother:headbandyes are p > .05)
9.Is there an effect of a pirate’s favorite pirate on the number of tattoos they have? Test this once using an ANOVA (the aov() function) and once using linear regression. How do the two p-values compare?
#using the aov-function
model.6 <- aov(tattoos ~ favorite.pirate,
data = pirates)
summary(model.6)
## Df Sum Sq Mean Sq F value Pr(>F)
## favorite.pirate 5 51 10.11 0.919 0.468
## Residuals 994 10939 11.01
#No - there are no significant results meaning that the favorite pirate does not have an effect on the number of tattoos.
#using the lm-function
model.7 <- lm(tattoos ~ favorite.pirate,
data = pirates)
summary(model.7)
##
## Call:
## lm(formula = tattoos ~ favorite.pirate, data = pirates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.713 -1.713 0.287 2.380 9.393
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.10000 0.30283 30.050 <2e-16 ***
## favorite.pirateBlackbeard 0.52000 0.44917 1.158 0.247
## favorite.pirateEdward Low 0.24211 0.43387 0.558 0.577
## favorite.pirateHook 0.61304 0.43290 1.416 0.157
## favorite.pirateJack Sparrow 0.50706 0.34059 1.489 0.137
## favorite.pirateLewis Scot -0.01837 0.45167 -0.041 0.968
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.317 on 994 degrees of freedom
## Multiple R-squared: 0.004601, Adjusted R-squared: -0.000406
## F-statistic: 0.9189 on 5 and 994 DF, p-value: 0.4678
#The favorite pirate does not have an effect on the number of tattoos.
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.