#1 Download the dataframe pirate_survey_noerrors.txt from http://nathanieldphillips.com/wp-content/uploads/2015/05/pirate_survey_noerrors.txt. The data are stored in a tab-separated text file with headers. Load the dataframe into an object called pirates. Because it’s tab-separated, use sep = “\t”.

pirates <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/05/pirate_survey_noerrors.txt", sep = "\t", header = T, stringsAsFactors = F)
#2 The function pairs() can create a matrix of scatterplots of different ratio or interval variables in a dataset. Enter the following code to see a matrix of scatterplots for the pirate dataset

pairs(~ age + tattoos + tchests.found + parrots.lifetime + sword.speed, data = pirates)

#3 What variables reliably predict the number of treasure chests a pirate has found? Conduct a simple linear regression analysis with treasure chests found as the dependent variable and 3 independent variables: parrots.lifetime, age, and tattoos. Save the model as the object model.1. Then, use the summary() function to see the coefficients. What are your conclusions?

model.1 <- lm (tchests.found ~ parrots.lifetime + age + tattoos, data = pirates)

summary (model.1)
## 
## Call:
## lm(formula = tchests.found ~ parrots.lifetime + age + tattoos, 
##     data = pirates)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.566 -5.225 -2.271  2.636 46.003 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.266327   1.407182   0.900 0.368389    
## parrots.lifetime -0.007083   0.088349  -0.080 0.936115    
## age               0.123838   0.044491   2.783 0.005480 ** 
## tattoos           0.274414   0.074297   3.693 0.000233 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.785 on 996 degrees of freedom
## Multiple R-squared:  0.02135,    Adjusted R-squared:  0.0184 
## F-statistic: 7.241 on 3 and 996 DF,  p-value: 8.283e-05
anova(model.1)
## Analysis of Variance Table
## 
## Response: tchests.found
##                   Df Sum Sq Mean Sq F value    Pr(>F)    
## parrots.lifetime   1     41   40.66  0.6709 0.4129230    
## age                1    449  449.16  7.4114 0.0065944 ** 
## tattoos            1    827  826.74 13.6417 0.0002332 ***
## Residuals        996  60361   60.60                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#F(3, 996 = 7.241 , p < .01 , R2 = .018 )
# for parrots, the result was non-significant (t(996) = -0.080 , p = 0.936)
# for age, the result was significant (t(996) = 2.783 , p < .01))
# for tattoos, the result was signficant (t(996) = 3.693 , p < .01
#4 Using the results from the previous question, create a scatterplot with the true values of the dependent variable (treasure chests found) on the x-axis and the model fits on the y-axis. Make the plot look nice with appropriate labels.

plot (x= pirates$tchests.found, y = model.1$tattoos, xlab = "Treasure chests found", ylab = "Tattoos", pch = 16, col = "pink")

plot (x= pirates$tchests.found, y = model.1$age, xlab = "Treasure Chests found", ylab = "Age", , pch = 16, col = "gray")

plot (x= pirates$tchests.found, y = model.1$parrots.lifetime, xlab = "Treasure chests found", ylab = "Parrots Lifetime",  pch = 16, col = "orange")

#5 Repeat your analysis from question 3, but only include pirates who are female and have owned less than 5 parrots in their lives. Do your conclusions change?

model.2 <- lm (tchests.found ~ parrots.lifetime + age + tattoos, data = pirates , subset = sex == "female" & parrots.lifetime < 5 )
 summary(model.2)
## 
## Call:
## lm(formula = tchests.found ~ parrots.lifetime + age + tattoos, 
##     data = pirates, subset = sex == "female" & parrots.lifetime < 
##         5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.426  -4.722  -2.113   2.851  44.026 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      -2.72969    2.61996  -1.042  0.29821   
## parrots.lifetime -0.18443    0.29642  -0.622  0.53423   
## age               0.25240    0.08075   3.126  0.00193 **
## tattoos           0.28510    0.11336   2.515  0.01237 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.258 on 338 degrees of freedom
## Multiple R-squared:  0.04689,    Adjusted R-squared:  0.03843 
## F-statistic: 5.542 on 3 and 338 DF,  p-value: 0.001006
#F(3, 338 = 5.542 , p < .01 , R2 = .038 )
# for parrots, the result was non-significant (t(338) = -0.622 , p = 0.534)
# for age, the result was significant (t(338) = 3.126 , p < .01))
# for tattoos, the result was significant (t(338) = 2.515 , p < .05))
#No, the conclusions do not change because tattoos and age remain significant and parrots is still non-significant.
#6 Is there a relationship between whether or not a pirate wears a headband and his/her sword speed? Test this using linear regression. What is your conclusion?

model.3 <- lm( sword.speed ~ headband, data = pirates)
summary (model.3)
## 
## Call:
## lm(formula = sword.speed ~ headband, data = pirates)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.658 -0.895 -0.576  0.063 43.483 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.6576     0.2553   6.494 1.32e-10 ***
## headbandyes  -0.5449     0.2686  -2.029   0.0428 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.514 on 998 degrees of freedom
## Multiple R-squared:  0.004107,   Adjusted R-squared:  0.003109 
## F-statistic: 4.115 on 1 and 998 DF,  p-value: 0.04276
anova(model.3)
## Analysis of Variance Table
## 
## Response: sword.speed
##            Df Sum Sq Mean Sq F value  Pr(>F)  
## headband    1   26.0 26.0079  4.1152 0.04276 *
## Residuals 998 6307.3  6.3199                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#F(1,998 = 4.115 , p < .05 , R2 = .003)
# for headband, the result was significant (t(998) = -2.029, p< .05)
#if a pirate wears a headband there is a decrease in swordspeed
#7 Now, repeat the analysis from question 6, but this time add sword.type as a second independent variable. What is your conclusion now?

model.4 <- lm( sword.speed ~ headband + sword.type , data = pirates)

summary (model.4)
## 
## Call:
## lm(formula = sword.speed ~ headband + sword.type, data = pirates)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.304 -0.564 -0.261  0.249 36.805 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.8331     0.3433  11.164  < 2e-16 ***
## headbandyes          3.9581     0.3044  13.003  < 2e-16 ***
## sword.typecutlass   -7.0595     0.3967 -17.796  < 2e-16 ***
## sword.typesabre     -3.3909     0.4250  -7.978 4.06e-15 ***
## sword.typescimitar  -1.5348     0.4317  -3.556 0.000395 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.063 on 995 degrees of freedom
## Multiple R-squared:  0.3314, Adjusted R-squared:  0.3287 
## F-statistic: 123.3 on 4 and 995 DF,  p-value: < 2.2e-16
anova (model.4)
## Analysis of Variance Table
## 
## Response: sword.speed
##             Df Sum Sq Mean Sq  F value Pr(>F)    
## headband     1   26.0   26.01   6.1116 0.0136 *  
## sword.type   3 2073.1  691.02 162.3837 <2e-16 ***
## Residuals  995 4234.2    4.26                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#F (4,995 = 123.3 , p < .01 , R2 = .328)
# for headband, the resul was significant (t(995) = 13.003 , p < .01)
# for sword.typecutlass the result was significant (t(995) = -17.796, p<.01)
# for sword.typesabre the result was significant (t(995) = -7.978, p<.01)
# for sword.typescimitar the result was significant (t(995) = -3.556, p<.01)
# a headband increases swordspeed
# the sword type decreased swordspeed
# there is a correlation between headband and correlation

table(pirates$headband, pirates$sword.type)
##      
##       banana cutlass sabre scimitar
##   no      27       8    32       30
##   yes     12     834    30       27
#8 Is there an interaction between sex and headband use when predicting a pirate’s sword speed? Test this only using pirates whose sex is male or female

model.5 <- lm(sword.speed ~ sex * headband, data = pirates)
summary(model.5)
## 
## Call:
## lm(formula = sword.speed ~ sex * headband, data = pirates)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.721 -0.894 -0.582  0.065 43.452 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           1.72108    0.35613   4.833 1.56e-06 ***
## sexmale              -0.04571    0.53419  -0.086   0.9318    
## sexother             -0.61876    1.01623  -0.609   0.5427    
## headbandyes          -0.62761    0.37687  -1.665   0.0962 .  
## sexmale:headbandyes   0.09614    0.56089   0.171   0.8639    
## sexother:headbandyes  0.45839    1.11105   0.413   0.6800    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.518 on 994 degrees of freedom
## Multiple R-squared:  0.004748,   Adjusted R-squared:  -0.0002582 
## F-statistic: 0.9484 on 5 and 994 DF,  p-value: 0.4487
# there is no significant interaction between sex and headband
#Is there an effect of a pirate’s favorite pirate on the number of tattoos they have? Test this once using an ANOVA (the aov() function) and once using linear regression. How do the two p-values compare?

model.6 <- lm(tattoos ~ favorite.pirate, data = pirates)
summary(model.6)
## 
## Call:
## lm(formula = tattoos ~ favorite.pirate, data = pirates)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.713 -1.713  0.287  2.380  9.393 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  9.10000    0.30283  30.050   <2e-16 ***
## favorite.pirateBlackbeard    0.52000    0.44917   1.158    0.247    
## favorite.pirateEdward Low    0.24211    0.43387   0.558    0.577    
## favorite.pirateHook          0.61304    0.43290   1.416    0.157    
## favorite.pirateJack Sparrow  0.50706    0.34059   1.489    0.137    
## favorite.pirateLewis Scot   -0.01837    0.45167  -0.041    0.968    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.317 on 994 degrees of freedom
## Multiple R-squared:  0.004601,   Adjusted R-squared:  -0.000406 
## F-statistic: 0.9189 on 5 and 994 DF,  p-value: 0.4678
anova(model.6)
## Analysis of Variance Table
## 
## Response: tattoos
##                  Df  Sum Sq Mean Sq F value Pr(>F)
## favorite.pirate   5    50.6  10.113  0.9189 0.4678
## Residuals       994 10939.0  11.005