WPA#7

#1 Download the dataframe pirate_survey_noerrors.txt from http://nathanieldphillips.com/wp-content/uploads/2015/05/pirate_survey_noerrors.txt. The data are stored in a tab-separated text file with headers. Load the dataframe into an object called pirates. Because it’s tab-separated, use sep = “\t”.

pirates <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/05/pirate_survey_noerrors.txt", sep = "\t", header = T, stringsAsFactors = F)

#2 The function pairs() can create a matrix of scatterplots of different ratio or interval variables in a dataset. Enter the following code to see a matrix of scatterplots for the pirate dataset

pairs(~ age + tattoos + tchests.found + parrots.lifetime + sword.speed, data = pirates)

#3 What variables reliably predict the number of treasure chests a pirate has found? Conduct a simple linear regression analysis with treasure chests found as the dependent variable and 3 independent variables: parrots.lifetime, age, and tattoos. Save the model as the object model.1. Then, use the summary() function to see the coefficients. What are your conclusions?

model.1 <- lm(tchests.found ~ parrots.lifetime + age + tattoos, data=pirates)
summary (model.1)

## 
## Call:
## lm(formula = tchests.found ~ parrots.lifetime + age + tattoos, 
##     data = pirates)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.566 -5.225 -2.271  2.636 46.003 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.266327   1.407182   0.900 0.368389    
## parrots.lifetime -0.007083   0.088349  -0.080 0.936115    
## age               0.123838   0.044491   2.783 0.005480 ** 
## tattoos           0.274414   0.074297   3.693 0.000233 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.785 on 996 degrees of freedom
## Multiple R-squared:  0.02135,    Adjusted R-squared:  0.0184 
## F-statistic: 7.241 on 3 and 996 DF,  p-value: 8.283e-05

# Results: Age F(1, 996)= 7.241, p< .01, R2= .018

anova(model.1)

## Analysis of Variance Table
## 
## Response: tchests.found
##                   Df Sum Sq Mean Sq F value    Pr(>F)    
## parrots.lifetime   1     41   40.66  0.6709 0.4129230    
## age                1    449  449.16  7.4114 0.0065944 ** 
## tattoos            1    827  826.74 13.6417 0.0002332 ***
## Residuals        996  60361   60.60                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#For individual Variables:
#parros.lifetime -> the result is not significant t(996)= -.08, p=0.936
#age -> significant! t(996)= 2.783, p=.005
#tattoos -> significant! t(996)= 3.693, p< .001

#4 Using the results from the previous question, create a scatterplot with the true values of the dependent variable (treasure chests found) on the x-axis and the model fits on the y-axis. Make the plot look nice with appropriate labels.

plot(x= pirates$tchests.found,
     y= model.1$tattoos,
     xlab= "Treasure chests found",
     ylab= "Tattoos",
     pch= 16,
     col= "pink")

plot(x= pirates$tchests.found,
     y= model.1$age,
     xlab= "Treasure chests found",
     ylab= "Age",
     pch= 16,
     col= "blue")

plot(x= pirates$tchests.found,
     y= model.1$parrots.lifetime,
     xlab= "Treasure chests found",
     ylab= "Parrots Lifetime",
     pch= 16,
     col= "orange")

# 5 Repeat your analysis from question 2, but only include pirates who are female and have owned less than 5 parrots in their lives. Do your conclusions change?
 
model.2 <- lm(tchests.found ~ parrots.lifetime + age + tattoos, data=pirates, subset= sex == "female" & parrots.lifetime < 5)
summary (model.2)

## 
## Call:
## lm(formula = tchests.found ~ parrots.lifetime + age + tattoos, 
##     data = pirates, subset = sex == "female" & parrots.lifetime < 
##         5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.426  -4.722  -2.113   2.851  44.026 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      -2.72969    2.61996  -1.042  0.29821   
## parrots.lifetime -0.18443    0.29642  -0.622  0.53423   
## age               0.25240    0.08075   3.126  0.00193 **
## tattoos           0.28510    0.11336   2.515  0.01237 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.258 on 338 degrees of freedom
## Multiple R-squared:  0.04689,    Adjusted R-squared:  0.03843 
## F-statistic: 5.542 on 3 and 338 DF,  p-value: 0.001006

#Results: 
#F(3, 338)= 5.542, p= .001, R2= .038

#For individual Variables:
#parros.lifetime -> the result is not significant t(338)= -.622, p=0.534
#age -> significant! t(338)= 3.126, p< .001
#tattoos -> significant! t(338)= 3.693, p< .01
#No change in the conclusion -> age & tattoos is still significant

#6 Is there a relationship between whether or not a pirate wears a headband and his/her sword speed? Test this using linear regression. What is your conclusion?

model.3 <- lm(sword.speed ~ headband, data=pirates)
summary(model.3)

## 
## Call:
## lm(formula = sword.speed ~ headband, data = pirates)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.658 -0.895 -0.576  0.063 43.483 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.6576     0.2553   6.494 1.32e-10 ***
## headbandyes  -0.5449     0.2686  -2.029   0.0428 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.514 on 998 degrees of freedom
## Multiple R-squared:  0.004107,   Adjusted R-squared:  0.003109 
## F-statistic: 4.115 on 1 and 998 DF,  p-value: 0.04276

anova(model.3)

## Analysis of Variance Table
## 
## Response: sword.speed
##            Df Sum Sq Mean Sq F value  Pr(>F)  
## headband    1   26.0 26.0079  4.1152 0.04276 *
## Residuals 998 6307.3  6.3199                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#if a pirate wears a headband, there is a decrease in his wordspeed! 
# F(1, 998)= 4.115, p=.043
# t(998)= -2.029, p< .01

#7 Now, repeat the analysis from question 6, but this time add sword.type as a second independent variable. What is your conclusion now?

model.4 <- lm(sword.speed ~ headband + sword.type, data=pirates)
summary (model.4)

## 
## Call:
## lm(formula = sword.speed ~ headband + sword.type, data = pirates)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.304 -0.564 -0.261  0.249 36.805 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.8331     0.3433  11.164  < 2e-16 ***
## headbandyes          3.9581     0.3044  13.003  < 2e-16 ***
## sword.typecutlass   -7.0595     0.3967 -17.796  < 2e-16 ***
## sword.typesabre     -3.3909     0.4250  -7.978 4.06e-15 ***
## sword.typescimitar  -1.5348     0.4317  -3.556 0.000395 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.063 on 995 degrees of freedom
## Multiple R-squared:  0.3314, Adjusted R-squared:  0.3287 
## F-statistic: 123.3 on 4 and 995 DF,  p-value: < 2.2e-16

anova(model.4)

## Analysis of Variance Table
## 
## Response: sword.speed
##             Df Sum Sq Mean Sq  F value Pr(>F)    
## headband     1   26.0   26.01   6.1116 0.0136 *  
## sword.type   3 2073.1  691.02 162.3837 <2e-16 ***
## Residuals  995 4234.2    4.26                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# F(4, 995)=123.3, p < .001, R2= 0.329

# correlation between headband and sword.type!
# now, if a pirate wears a headband -> increase in swordspeed!
# t(995)=13.003, p< .001

# all kind of sword.type decreases the swordspeed
# sword.typecutlass t(995)= -17.796, p< .001
# sword.typeabre t(995)= -7.978, p< .001
# sword.typecimitar t(995)= -3.556, p< .001

table(pirates$headband, pirates$sword.type)

##      
##       banana cutlass sabre scimitar
##   no      27       8    32       30
##   yes     12     834    30       27

#8 Is there an interaction between sex and headband use when predicting a pirate’s sword speed? Test this only using pirates whose sex is male or female

model.5 <- lm(sword.speed ~ sex * headband, data=pirates)
summary(model.5)

## 
## Call:
## lm(formula = sword.speed ~ sex * headband, data = pirates)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.721 -0.894 -0.582  0.065 43.452 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           1.72108    0.35613   4.833 1.56e-06 ***
## sexmale              -0.04571    0.53419  -0.086   0.9318    
## sexother             -0.61876    1.01623  -0.609   0.5427    
## headbandyes          -0.62761    0.37687  -1.665   0.0962 .  
## sexmale:headbandyes   0.09614    0.56089   0.171   0.8639    
## sexother:headbandyes  0.45839    1.11105   0.413   0.6800    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.518 on 994 degrees of freedom
## Multiple R-squared:  0.004748,   Adjusted R-squared:  -0.0002582 
## F-statistic: 0.9484 on 5 and 994 DF,  p-value: 0.4487

# F(5,994)= 0.9484, p=.4487, R2= -.0003

anova(model.5)

## Analysis of Variance Table
## 
## Response: sword.speed
##               Df Sum Sq Mean Sq F value  Pr(>F)  
## sex            2    1.9  0.9693  0.1529 0.85827  
## headband       1   27.0 27.0131  4.2599 0.03928 *
## sex:headband   2    1.1  0.5597  0.0883 0.91553  
## Residuals    994 6303.2  6.3413                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# there is no sig. interaction between sex and headband, but a sig. main effect of headband
# F(1,994)= 4.2599, p=.039

WPA#7

Lea Riegler

Juni 2015