#1 Download the dataframe pirate_survey_noerrors.txt from http://nathanieldphillips.com/wp-content/uploads/2015/05/pirate_survey_noerrors.txt. The data are stored in a tab-separated text file with headers. Load the dataframe into an object called pirates. Because it’s tab-separated, use sep = “\t”.
pirates <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/05/pirate_survey_noerrors.txt", sep = "\t", header = T, stringsAsFactors = F)
#2 The function pairs() can create a matrix of scatterplots of different ratio or interval variables in a dataset. Enter the following code to see a matrix of scatterplots for the pirate dataset
pairs(~ age + tattoos + tchests.found + parrots.lifetime + sword.speed, data = pirates)

#3 What variables reliably predict the number of treasure chests a pirate has found? Conduct a simple linear regression analysis with treasure chests found as the dependent variable and 3 independent variables: parrots.lifetime, age, and tattoos. Save the model as the object model.1. Then, use the summary() function to see the coefficients. What are your conclusions?
model.1 <- lm(tchests.found ~ parrots.lifetime + age + tattoos, data=pirates)
summary (model.1)
##
## Call:
## lm(formula = tchests.found ~ parrots.lifetime + age + tattoos,
## data = pirates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.566 -5.225 -2.271 2.636 46.003
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.266327 1.407182 0.900 0.368389
## parrots.lifetime -0.007083 0.088349 -0.080 0.936115
## age 0.123838 0.044491 2.783 0.005480 **
## tattoos 0.274414 0.074297 3.693 0.000233 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.785 on 996 degrees of freedom
## Multiple R-squared: 0.02135, Adjusted R-squared: 0.0184
## F-statistic: 7.241 on 3 and 996 DF, p-value: 8.283e-05
# Results: Age F(1, 996)= 7.241, p< .01, R2= .018
anova(model.1)
## Analysis of Variance Table
##
## Response: tchests.found
## Df Sum Sq Mean Sq F value Pr(>F)
## parrots.lifetime 1 41 40.66 0.6709 0.4129230
## age 1 449 449.16 7.4114 0.0065944 **
## tattoos 1 827 826.74 13.6417 0.0002332 ***
## Residuals 996 60361 60.60
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#For individual Variables:
#parros.lifetime -> the result is not significant t(996)= -.08, p=0.936
#age -> significant! t(996)= 2.783, p=.005
#tattoos -> significant! t(996)= 3.693, p< .001
#4 Using the results from the previous question, create a scatterplot with the true values of the dependent variable (treasure chests found) on the x-axis and the model fits on the y-axis. Make the plot look nice with appropriate labels.
plot(x= pirates$tchests.found,
y= model.1$tattoos,
xlab= "Treasure chests found",
ylab= "Tattoos",
pch= 16,
col= "pink")

plot(x= pirates$tchests.found,
y= model.1$age,
xlab= "Treasure chests found",
ylab= "Age",
pch= 16,
col= "blue")

plot(x= pirates$tchests.found,
y= model.1$parrots.lifetime,
xlab= "Treasure chests found",
ylab= "Parrots Lifetime",
pch= 16,
col= "orange")

# 5 Repeat your analysis from question 2, but only include pirates who are female and have owned less than 5 parrots in their lives. Do your conclusions change?
model.2 <- lm(tchests.found ~ parrots.lifetime + age + tattoos, data=pirates, subset= sex == "female" & parrots.lifetime < 5)
summary (model.2)
##
## Call:
## lm(formula = tchests.found ~ parrots.lifetime + age + tattoos,
## data = pirates, subset = sex == "female" & parrots.lifetime <
## 5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.426 -4.722 -2.113 2.851 44.026
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.72969 2.61996 -1.042 0.29821
## parrots.lifetime -0.18443 0.29642 -0.622 0.53423
## age 0.25240 0.08075 3.126 0.00193 **
## tattoos 0.28510 0.11336 2.515 0.01237 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.258 on 338 degrees of freedom
## Multiple R-squared: 0.04689, Adjusted R-squared: 0.03843
## F-statistic: 5.542 on 3 and 338 DF, p-value: 0.001006
#Results:
#F(3, 338)= 5.542, p= .001, R2= .038
#For individual Variables:
#parros.lifetime -> the result is not significant t(338)= -.622, p=0.534
#age -> significant! t(338)= 3.126, p< .001
#tattoos -> significant! t(338)= 3.693, p< .01
#No change in the conclusion -> age & tattoos is still significant
#6 Is there a relationship between whether or not a pirate wears a headband and his/her sword speed? Test this using linear regression. What is your conclusion?
model.3 <- lm(sword.speed ~ headband, data=pirates)
summary(model.3)
##
## Call:
## lm(formula = sword.speed ~ headband, data = pirates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.658 -0.895 -0.576 0.063 43.483
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6576 0.2553 6.494 1.32e-10 ***
## headbandyes -0.5449 0.2686 -2.029 0.0428 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.514 on 998 degrees of freedom
## Multiple R-squared: 0.004107, Adjusted R-squared: 0.003109
## F-statistic: 4.115 on 1 and 998 DF, p-value: 0.04276
anova(model.3)
## Analysis of Variance Table
##
## Response: sword.speed
## Df Sum Sq Mean Sq F value Pr(>F)
## headband 1 26.0 26.0079 4.1152 0.04276 *
## Residuals 998 6307.3 6.3199
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#if a pirate wears a headband, there is a decrease in his wordspeed!
# F(1, 998)= 4.115, p=.043
# t(998)= -2.029, p< .01
#7 Now, repeat the analysis from question 6, but this time add sword.type as a second independent variable. What is your conclusion now?
model.4 <- lm(sword.speed ~ headband + sword.type, data=pirates)
summary (model.4)
##
## Call:
## lm(formula = sword.speed ~ headband + sword.type, data = pirates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.304 -0.564 -0.261 0.249 36.805
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.8331 0.3433 11.164 < 2e-16 ***
## headbandyes 3.9581 0.3044 13.003 < 2e-16 ***
## sword.typecutlass -7.0595 0.3967 -17.796 < 2e-16 ***
## sword.typesabre -3.3909 0.4250 -7.978 4.06e-15 ***
## sword.typescimitar -1.5348 0.4317 -3.556 0.000395 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.063 on 995 degrees of freedom
## Multiple R-squared: 0.3314, Adjusted R-squared: 0.3287
## F-statistic: 123.3 on 4 and 995 DF, p-value: < 2.2e-16
anova(model.4)
## Analysis of Variance Table
##
## Response: sword.speed
## Df Sum Sq Mean Sq F value Pr(>F)
## headband 1 26.0 26.01 6.1116 0.0136 *
## sword.type 3 2073.1 691.02 162.3837 <2e-16 ***
## Residuals 995 4234.2 4.26
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# F(4, 995)=123.3, p < .001, R2= 0.329
# correlation between headband and sword.type!
# now, if a pirate wears a headband -> increase in swordspeed!
# t(995)=13.003, p< .001
# all kind of sword.type decreases the swordspeed
# sword.typecutlass t(995)= -17.796, p< .001
# sword.typeabre t(995)= -7.978, p< .001
# sword.typecimitar t(995)= -3.556, p< .001
table(pirates$headband, pirates$sword.type)
##
## banana cutlass sabre scimitar
## no 27 8 32 30
## yes 12 834 30 27
#8 Is there an interaction between sex and headband use when predicting a pirate’s sword speed? Test this only using pirates whose sex is male or female
model.5 <- lm(sword.speed ~ sex * headband, data=pirates)
summary(model.5)
##
## Call:
## lm(formula = sword.speed ~ sex * headband, data = pirates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.721 -0.894 -0.582 0.065 43.452
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.72108 0.35613 4.833 1.56e-06 ***
## sexmale -0.04571 0.53419 -0.086 0.9318
## sexother -0.61876 1.01623 -0.609 0.5427
## headbandyes -0.62761 0.37687 -1.665 0.0962 .
## sexmale:headbandyes 0.09614 0.56089 0.171 0.8639
## sexother:headbandyes 0.45839 1.11105 0.413 0.6800
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.518 on 994 degrees of freedom
## Multiple R-squared: 0.004748, Adjusted R-squared: -0.0002582
## F-statistic: 0.9484 on 5 and 994 DF, p-value: 0.4487
# F(5,994)= 0.9484, p=.4487, R2= -.0003
anova(model.5)
## Analysis of Variance Table
##
## Response: sword.speed
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 2 1.9 0.9693 0.1529 0.85827
## headband 1 27.0 27.0131 4.2599 0.03928 *
## sex:headband 2 1.1 0.5597 0.0883 0.91553
## Residuals 994 6303.2 6.3413
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# there is no sig. interaction between sex and headband, but a sig. main effect of headband
# F(1,994)= 4.2599, p=.039