Question 0 A: Dataframe: capture
capture <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/12/capture.txt",
sep = "\t", header = T, stringsAsFactors = F)
Question 0
#C
names(capture)
## [1] "size" "cannons" "style" "warnshot"
## [5] "date" "heardof" "decorations" "daysfromshore"
## [9] "speed" "treasure"
#D
head(capture)
## size cannons style warnshot date heardof decorations daysfromshore
## 1 48 54 classic 0 172 1 8 28
## 2 51 56 modern 0 15 0 3 6
## 3 50 44 modern 0 63 0 3 23
## 4 54 54 modern 0 362 1 2 23
## 5 50 56 modern 0 183 1 2 12
## 6 51 48 modern 0 279 0 1 3
## speed treasure
## 1 16 2175
## 2 29 2465
## 3 18 1925
## 4 19 2200
## 5 21 2290
## 6 24 2195
Question 1
#A
size.treasure.lm <- lm(treasure ~ size, data = capture)
plot(x = capture$size, y = capture$treasure, xlab = "size", ylab = "treasure", main = "Relationship between treasure & size")
abline(size.treasure.lm, lty = 1, lwd = 2, col = "blue")

#B
cannons.treasure.lm <- lm(treasure ~ cannons, data = capture)
plot(x = capture$cannons, y = capture$treasure, xlab = "cannons", ylab = "treasure", main = "Relationship between treasure & cannons")
abline(cannons.treasure.lm, lty = 1, lwd = 2, col = "coral")

#C
date.treasure.lm <- lm(treasure ~ date, data = capture)
plot(x = capture$date, y = capture$treasure, xlab = "date", ylab = "treasure", main = "Relationship between treasure & date")
abline(date.treasure.lm, lty = 1, lwd = 2, col = "blue")

#D
decorations.treasure.lm <- lm(treasure ~ decorations, data = capture)
plot(x = capture$decorations, y = capture$treasure, xlab = "decorations", ylab = "treasure", main = "Relationship between treasure & decorations")
abline(decorations.treasure.lm, lty = 1, lwd = 2, col = "blue")

#E
shore.treasure.lm <- lm(treasure ~ daysfromshore, data = capture)
plot(x = capture$daysfromshore, y = capture$treasure, xlab = "days from shore", ylab = "treasure", main = "Relationship between treasure & days from shore")
abline(shore.treasure.lm, lty = 1, lwd = 2, col = "blue")

#F
speed.treasure.lm <- lm(treasure ~ speed, data = capture)
plot(x = capture$speed, y = capture$treasure, xlab = "speed", ylab = "treasure", main = "Relationship between treasure & speed")
abline(speed.treasure.lm, lty = 1, lwd = 2, col = "blue")

Question 2
#A
boxplot(treasure ~ style, data = capture, xlab = names(capture$style), ylab = "treasure", main = "Realtionship between treasure and style")

#B
boxplot(treasure ~ warnshot, data = capture, xlab = names(capture$warnshot), ylab = "treasure", main = "Relationship between treasure and warnshot")

#C
boxplot(treasure ~ heardof, data = capture, xlab = names(capture$heardof), ylab = "treasure", main = "Realtionship between treasure and heardof")

Question 3
aggregate(formula = treasure ~ style,
data = capture,
FUN = median)
## style treasure
## 1 classic 2000
## 2 modern 1895
aggregate(formula = treasure ~ warnshot,
data = capture,
FUN = median)
## warnshot treasure
## 1 0 1885
## 2 1 1945
aggregate(formula = treasure ~ decorations,
data = capture,
FUN = median)
## decorations treasure
## 1 1 2657.5
## 2 2 1780.0
## 3 3 1905.0
## 4 4 1797.5
## 5 5 1880.0
## 6 6 1855.0
## 7 7 1920.0
## 8 8 1935.0
## 9 9 1935.0
## 10 10 1955.0
Question 4
#A
cor.test(~ cannons + size,
data = capture)
##
## Pearson's product-moment correlation
##
## data: cannons and size
## t = 0.90501, df = 998, p-value = 0.3657
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.03341657 0.09046832
## sample estimates:
## cor
## 0.02863584
# There is no sign. relationship between the size of a ship and its amount of cannons (r(998) = 0.02, p = 0.366).
#B
cannon.size.lm <- lm(cannons ~ size, data = capture)
summary(cannon.size.lm)
##
## Call:
## lm(formula = cannons ~ size, data = capture)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.549 -14.324 -0.324 12.498 63.414
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.3039 7.2645 3.621 0.000308 ***
## size 0.1309 0.1446 0.905 0.365679
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.92 on 998 degrees of freedom
## Multiple R-squared: 0.00082, Adjusted R-squared: -0.0001812
## F-statistic: 0.819 on 1 and 998 DF, p-value: 0.3657
#There is no sign. relationship between the size of a ship and its amount of cannons(F(998) = 0.819, p = 0.366. So the p value stays the same.
Question 5
#A
treasure.model <- lm(treasure ~ size + cannons + style + warnshot + date + heardof + decorations + daysfromshore + speed, data = capture)
#B
summary(treasure.model)
##
## Call:
## lm(formula = treasure ~ size + cannons + style + warnshot + date +
## heardof + decorations + daysfromshore + speed, data = capture)
##
## Residuals:
## Min 1Q Median 3Q Max
## -880.96 -443.16 -211.02 66.08 2427.97
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 749.8957 351.0514 2.136 0.032913 *
## size 22.5203 5.9602 3.778 0.000167 ***
## cannons 19.3817 1.2932 14.987 < 2e-16 ***
## stylemodern -165.0932 84.6314 -1.951 0.051371 .
## warnshot 89.0164 61.0610 1.458 0.145205
## date 0.1508 0.2313 0.652 0.514511
## heardof 92.1270 54.7238 1.683 0.092595 .
## decorations -96.3998 10.0249 -9.616 < 2e-16 ***
## daysfromshore -8.6119 2.8180 -3.056 0.002303 **
## speed 9.2639 8.3892 1.104 0.269750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 771.4 on 990 degrees of freedom
## Multiple R-squared: 0.2661, Adjusted R-squared: 0.2594
## F-statistic: 39.88 on 9 and 990 DF, p-value: < 2.2e-16
#C
#There is a sign. positive effect of "size"" (t(990) = 3,78, p < 0.001)
#There is a sign. positive effect of "cannons"" (t(990) = 14.99, p < 0.001).
#There is a sign. negative effect of "decorations"" (t(990) = -9.61, p < 0.001)
#There is a sign. negative effect of "days from shore" (t(990) = -3.06, p < 0.01).
#D
#There is no sign. effect of "style"" (t(990) = -1.95, p < 0.1)
#There is no sign. effect of "warnshot" (t(990) = 1.458, p = 0.14)
#There is no sign. effect of "date" (t(990) = 0.65, p = 0.51)
#There is no sign. effect of "heardof" (t(990) = 1.68, p < 0.1)
#There is no sign. effect of "speed" (t(990) = 1.1, p = 0.27)
Question 6
#A
#There is a sign. negative effect of "decorations"" (t(990) = -9.61, p < 0.001) on the number of treasures.
#B
deco.treasure.lm1 <- lm(treasure ~ decorations, data = capture)
plot(x = capture$decorations, y = capture$treasure, xlab = "decorations", ylab = "treasure", main = "Relationship between treasure & deco")
abline(deco.treasure.lm1, lty = 1, lwd = 2, col = "blue")

# YES! the plot doesn't fit with the prior results (the regression line)
#C
treasure.lt3500.model <- lm(treasure ~ size + cannons + style + warnshot + date + heardof + decorations + daysfromshore + speed, data = capture, subset = treasure < 3500)
#D
summary(treasure.lt3500.model)
##
## Call:
## lm(formula = treasure ~ size + cannons + style + warnshot + date +
## heardof + decorations + daysfromshore + speed, data = capture,
## subset = treasure < 3500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.703 -1.926 2.320 5.420 8.845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.046e+01 3.844e+00 -2.722 0.00662 **
## size 2.000e+01 6.540e-02 305.746 < 2e-16 ***
## cannons 1.999e+01 1.405e-02 1422.085 < 2e-16 ***
## stylemodern 6.147e+00 9.457e-01 6.500 1.32e-10 ***
## warnshot 1.001e+02 6.702e-01 149.289 < 2e-16 ***
## date -6.736e-04 2.561e-03 -0.263 0.79258
## heardof 1.462e+01 6.046e-01 24.182 < 2e-16 ***
## decorations 3.183e+01 1.137e-01 279.890 < 2e-16 ***
## daysfromshore -1.000e+01 3.107e-02 -321.940 < 2e-16 ***
## speed 9.972e+00 9.173e-02 108.711 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.101 on 905 degrees of freedom
## Multiple R-squared: 0.9996, Adjusted R-squared: 0.9996
## F-statistic: 2.587e+05 on 9 and 905 DF, p-value: < 2.2e-16
#E
# Yes my conclusion changed, there is now a sign. positive effect of "decorations" (t(905) = 279.89, p < 0.001) on "treasure".
# There is still a sign. pos. effect of "size".
# There is still a sign. pos. effect of "cannons".
# There is now a sign. positive effect of "style".
# There is now a positive effect of "warnshots".
# There is still no sign. effect of "date".
# There is now a positive effect of "heardof".
# There is still a negative effect of "daysfromshore".
# There is now a positiveeffect of "speed".
Question 7
#A
treasure.model2 <- lm(treasure ~ size + cannons + speed, data = capture)
summary (treasure.model2)
##
## Call:
## lm(formula = treasure ~ size + cannons + speed, data = capture)
##
## Residuals:
## Min 1Q Median 3Q Max
## -564.6 -320.0 -230.8 -128.7 2834.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -25.334 349.199 -0.073 0.942
## size 26.327 6.209 4.240 2.44e-05 ***
## cannons 19.346 1.355 14.275 < 2e-16 ***
## speed 8.724 8.791 0.992 0.321
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 810 on 996 degrees of freedom
## Multiple R-squared: 0.1858, Adjusted R-squared: 0.1833
## F-statistic: 75.75 on 3 and 996 DF, p-value: < 2.2e-16
#B
new.capture <- data.frame("size" = 60, "cannons" = 80, "speed" = 100)
predict(treasure.model2, newdata = new.capture)
## 1
## 3974.313
#C
#Estimate (cannons) = 19.38 * 2 -> more treasures
#D
new.capture <- data.frame("size" = 60, "cannons" = 82, "speed" = 100)
predict(treasure.model2, newdata = new.capture)
## 1
## 4013.005
Question 8
#A
my.data <- data.frame(a = c(1, 5, 3, 6, 3, 5, 3, 8, 3),
b = c(8, 3, 1, 4, 2, 6, 4, 8, 3))
#B
my.data$c <- 3 * my.data$a - 5 * my.data$b
#C
# sign. positive effect of a on c (estimate 3)
# sign. negative effect of b on c (estimate -5)
#D
abc.lm <- lm(c ~ a + b, data = my.data)
summary(abc.lm)
## Warning in summary.lm(abc.lm): essentially perfect fit: summary may be
## unreliable
##
## Call:
## lm(formula = c ~ a + b, data = my.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.266e-15 -9.778e-16 5.658e-16 9.625e-16 2.773e-15
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.474e-15 1.889e-15 5.016e+00 0.00241 **
## a 3.000e+00 3.702e-16 8.103e+15 < 2e-16 ***
## b -5.000e+00 3.093e-16 -1.617e+16 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.114e-15 on 6 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.391e+32 on 2 and 6 DF, p-value: < 2.2e-16
#surprisingly yes:)