Question 0 A: Dataframe: capture

capture <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/12/capture.txt", 
                      sep = "\t", header = T, stringsAsFactors = F)

Question 0

#C
names(capture)
##  [1] "size"          "cannons"       "style"         "warnshot"     
##  [5] "date"          "heardof"       "decorations"   "daysfromshore"
##  [9] "speed"         "treasure"
#D
head(capture)
##   size cannons   style warnshot date heardof decorations daysfromshore
## 1   48      54 classic        0  172       1           8            28
## 2   51      56  modern        0   15       0           3             6
## 3   50      44  modern        0   63       0           3            23
## 4   54      54  modern        0  362       1           2            23
## 5   50      56  modern        0  183       1           2            12
## 6   51      48  modern        0  279       0           1             3
##   speed treasure
## 1    16     2175
## 2    29     2465
## 3    18     1925
## 4    19     2200
## 5    21     2290
## 6    24     2195

Question 1

#A 
size.treasure.lm <- lm(treasure ~ size, data = capture)

plot(x = capture$size, y = capture$treasure, xlab = "size", ylab = "treasure", main = "Relationship between treasure & size")

abline(size.treasure.lm, lty = 1, lwd = 2, col = "blue")

#B
cannons.treasure.lm <- lm(treasure ~ cannons, data = capture)

plot(x = capture$cannons, y = capture$treasure, xlab = "cannons", ylab = "treasure", main = "Relationship between treasure & cannons")

abline(cannons.treasure.lm, lty = 1, lwd = 2, col = "coral")

#C
date.treasure.lm <- lm(treasure ~ date, data = capture)

plot(x = capture$date, y = capture$treasure, xlab = "date", ylab = "treasure", main = "Relationship between treasure & date")

abline(date.treasure.lm, lty = 1, lwd = 2, col = "blue")

#D
decorations.treasure.lm <- lm(treasure ~ decorations, data = capture)

plot(x = capture$decorations, y = capture$treasure, xlab = "decorations", ylab = "treasure", main = "Relationship between treasure & decorations")

abline(decorations.treasure.lm, lty = 1, lwd = 2, col = "blue")

#E
shore.treasure.lm <- lm(treasure ~ daysfromshore, data = capture)

plot(x = capture$daysfromshore, y = capture$treasure, xlab = "days from shore", ylab = "treasure", main = "Relationship between treasure & days from shore")

abline(shore.treasure.lm, lty = 1, lwd = 2, col = "blue")

#F
speed.treasure.lm <- lm(treasure ~ speed, data = capture)

plot(x = capture$speed, y = capture$treasure, xlab = "speed", ylab = "treasure", main = "Relationship between treasure & speed")

abline(speed.treasure.lm, lty = 1, lwd = 2, col = "blue")

Question 2

#A
boxplot(treasure ~ style, data = capture, xlab = names(capture$style), ylab = "treasure", main = "Realtionship between treasure and style")

#B
boxplot(treasure ~ warnshot, data = capture, xlab = names(capture$warnshot), ylab = "treasure", main = "Relationship between treasure and warnshot")

#C
boxplot(treasure ~ heardof, data = capture, xlab = names(capture$heardof), ylab = "treasure", main = "Realtionship between treasure and heardof")

Question 3

aggregate(formula = treasure ~ style,
          data = capture,
          FUN = median)
##     style treasure
## 1 classic     2000
## 2  modern     1895
aggregate(formula = treasure ~ warnshot,
          data = capture,
          FUN = median)
##   warnshot treasure
## 1        0     1885
## 2        1     1945
aggregate(formula = treasure ~ decorations,
          data = capture,
          FUN = median)
##    decorations treasure
## 1            1   2657.5
## 2            2   1780.0
## 3            3   1905.0
## 4            4   1797.5
## 5            5   1880.0
## 6            6   1855.0
## 7            7   1920.0
## 8            8   1935.0
## 9            9   1935.0
## 10          10   1955.0

Question 4

#A
cor.test(~ cannons + size,
         data = capture)
## 
##  Pearson's product-moment correlation
## 
## data:  cannons and size
## t = 0.90501, df = 998, p-value = 0.3657
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03341657  0.09046832
## sample estimates:
##        cor 
## 0.02863584
# There is no sign. relationship between the size of a ship and its amount of cannons (r(998) = 0.02, p = 0.366).

#B
cannon.size.lm <- lm(cannons ~ size, data = capture)

summary(cannon.size.lm)
## 
## Call:
## lm(formula = cannons ~ size, data = capture)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.549 -14.324  -0.324  12.498  63.414 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  26.3039     7.2645   3.621 0.000308 ***
## size          0.1309     0.1446   0.905 0.365679    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.92 on 998 degrees of freedom
## Multiple R-squared:  0.00082,    Adjusted R-squared:  -0.0001812 
## F-statistic: 0.819 on 1 and 998 DF,  p-value: 0.3657
#There is no sign. relationship between the size of a ship and its amount of cannons(F(998) = 0.819, p = 0.366. So the p value stays the same. 

Question 5

#A
treasure.model <- lm(treasure ~ size + cannons + style + warnshot + date + heardof + decorations + daysfromshore + speed, data = capture)

#B
summary(treasure.model)
## 
## Call:
## lm(formula = treasure ~ size + cannons + style + warnshot + date + 
##     heardof + decorations + daysfromshore + speed, data = capture)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -880.96 -443.16 -211.02   66.08 2427.97 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    749.8957   351.0514   2.136 0.032913 *  
## size            22.5203     5.9602   3.778 0.000167 ***
## cannons         19.3817     1.2932  14.987  < 2e-16 ***
## stylemodern   -165.0932    84.6314  -1.951 0.051371 .  
## warnshot        89.0164    61.0610   1.458 0.145205    
## date             0.1508     0.2313   0.652 0.514511    
## heardof         92.1270    54.7238   1.683 0.092595 .  
## decorations    -96.3998    10.0249  -9.616  < 2e-16 ***
## daysfromshore   -8.6119     2.8180  -3.056 0.002303 ** 
## speed            9.2639     8.3892   1.104 0.269750    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 771.4 on 990 degrees of freedom
## Multiple R-squared:  0.2661, Adjusted R-squared:  0.2594 
## F-statistic: 39.88 on 9 and 990 DF,  p-value: < 2.2e-16
#C 
#There is a sign. positive effect of "size"" (t(990) = 3,78, p < 0.001)
#There is a sign. positive effect of "cannons"" (t(990) = 14.99, p < 0.001). 
#There is a sign. negative effect of "decorations"" (t(990) = -9.61, p < 0.001) 
#There is a sign. negative effect of "days from shore" (t(990) = -3.06, p < 0.01).

#D 
#There is no sign. effect of "style"" (t(990) = -1.95, p < 0.1)
#There is no sign. effect of "warnshot" (t(990) = 1.458, p = 0.14)
#There is no sign. effect of "date" (t(990) = 0.65, p = 0.51)
#There is no sign. effect of "heardof" (t(990) = 1.68, p < 0.1)
#There is no sign. effect of "speed" (t(990) = 1.1, p = 0.27)

Question 6

#A
#There is a sign. negative effect of "decorations"" (t(990) = -9.61, p < 0.001) on the number of treasures. 

#B
deco.treasure.lm1 <- lm(treasure ~ decorations, data = capture)

plot(x = capture$decorations, y = capture$treasure, xlab = "decorations", ylab = "treasure", main = "Relationship between treasure & deco")

abline(deco.treasure.lm1, lty = 1, lwd = 2, col = "blue")

# YES! the plot doesn't fit with the prior results (the regression line)

#C
treasure.lt3500.model <- lm(treasure ~ size + cannons + style + warnshot + date + heardof + decorations + daysfromshore + speed, data = capture, subset = treasure < 3500)

#D
summary(treasure.lt3500.model)
## 
## Call:
## lm(formula = treasure ~ size + cannons + style + warnshot + date + 
##     heardof + decorations + daysfromshore + speed, data = capture, 
##     subset = treasure < 3500)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.703  -1.926   2.320   5.420   8.845 
## 
## Coefficients:
##                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)   -1.046e+01  3.844e+00   -2.722  0.00662 ** 
## size           2.000e+01  6.540e-02  305.746  < 2e-16 ***
## cannons        1.999e+01  1.405e-02 1422.085  < 2e-16 ***
## stylemodern    6.147e+00  9.457e-01    6.500 1.32e-10 ***
## warnshot       1.001e+02  6.702e-01  149.289  < 2e-16 ***
## date          -6.736e-04  2.561e-03   -0.263  0.79258    
## heardof        1.462e+01  6.046e-01   24.182  < 2e-16 ***
## decorations    3.183e+01  1.137e-01  279.890  < 2e-16 ***
## daysfromshore -1.000e+01  3.107e-02 -321.940  < 2e-16 ***
## speed          9.972e+00  9.173e-02  108.711  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.101 on 905 degrees of freedom
## Multiple R-squared:  0.9996, Adjusted R-squared:  0.9996 
## F-statistic: 2.587e+05 on 9 and 905 DF,  p-value: < 2.2e-16
#E
# Yes my conclusion changed, there is now a sign. positive effect of "decorations" (t(905) = 279.89, p < 0.001) on "treasure". 

# There is still a sign. pos. effect of "size".
# There is still a sign. pos. effect of "cannons".
# There is now a sign. positive effect of "style". 
# There is now a positive effect of "warnshots".
# There is still no sign. effect of "date". 
# There is now a positive effect of "heardof".
# There is still a negative effect of "daysfromshore".
# There is now a positiveeffect of "speed".

Question 7

#A
treasure.model2 <- lm(treasure ~ size + cannons + speed, data = capture)

summary (treasure.model2)
## 
## Call:
## lm(formula = treasure ~ size + cannons + speed, data = capture)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -564.6 -320.0 -230.8 -128.7 2834.7 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -25.334    349.199  -0.073    0.942    
## size          26.327      6.209   4.240 2.44e-05 ***
## cannons       19.346      1.355  14.275  < 2e-16 ***
## speed          8.724      8.791   0.992    0.321    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 810 on 996 degrees of freedom
## Multiple R-squared:  0.1858, Adjusted R-squared:  0.1833 
## F-statistic: 75.75 on 3 and 996 DF,  p-value: < 2.2e-16
#B
new.capture <- data.frame("size" = 60, "cannons" = 80, "speed" = 100)

predict(treasure.model2, newdata = new.capture)
##        1 
## 3974.313
#C
#Estimate (cannons) = 19.38 * 2 -> more treasures

#D
new.capture <- data.frame("size" = 60, "cannons" = 82, "speed" = 100)

predict(treasure.model2, newdata = new.capture)
##        1 
## 4013.005

Question 8

#A
my.data <- data.frame(a = c(1, 5, 3, 6, 3, 5, 3, 8, 3),
                      b = c(8, 3, 1, 4, 2, 6, 4, 8, 3))

#B
my.data$c <- 3 * my.data$a - 5 * my.data$b

#C
# sign. positive effect of a on c (estimate 3)
# sign. negative effect of b on c (estimate -5)

#D
abc.lm <- lm(c ~ a + b, data = my.data)

summary(abc.lm)
## Warning in summary.lm(abc.lm): essentially perfect fit: summary may be
## unreliable
## 
## Call:
## lm(formula = c ~ a + b, data = my.data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -3.266e-15 -9.778e-16  5.658e-16  9.625e-16  2.773e-15 
## 
## Coefficients:
##               Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)  9.474e-15  1.889e-15  5.016e+00  0.00241 ** 
## a            3.000e+00  3.702e-16  8.103e+15  < 2e-16 ***
## b           -5.000e+00  3.093e-16 -1.617e+16  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.114e-15 on 6 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.391e+32 on 2 and 6 DF,  p-value: < 2.2e-16
#surprisingly yes:)