download.file("http://www.openintro.org/stat/data/mlb11.RData", destfile = "mlb11.RData")
load("mlb11.RData")
head(mlb11)
## team runs at_bats hits homeruns bat_avg strikeouts
## 1 Texas Rangers 855 5659 1599 210 0.283 930
## 2 Boston Red Sox 875 5710 1600 203 0.280 1108
## 3 Detroit Tigers 787 5563 1540 169 0.277 1143
## 4 Kansas City Royals 730 5672 1560 129 0.275 1006
## 5 St. Louis Cardinals 762 5532 1513 162 0.273 978
## 6 New York Mets 718 5600 1477 108 0.264 1085
## stolen_bases wins new_onbase new_slug new_obs
## 1 143 96 0.340 0.460 0.800
## 2 102 90 0.349 0.461 0.810
## 3 49 95 0.340 0.434 0.773
## 4 153 71 0.329 0.415 0.744
## 5 57 90 0.341 0.425 0.766
## 6 130 77 0.335 0.391 0.725
plot(mlb11$at_bats, mlb11$runs)
cor(mlb11$runs, mlb11$at_bats)
## [1] 0.610627
plot_ss(x=mlb11$at_bats,y=mlb11$runs)
## Click two points to make a line.
## Call:
## lm(formula = y ~ x, data = pts)
##
## Coefficients:
## (Intercept) x
## -2789.2429 0.6305
##
## Sum of Squares: 123721.9
plot_ss(x=mlb11$at_bats,y=mlb11$runs,showSquares = TRUE)
## Click two points to make a line.
## Call:
## lm(formula = y ~ x, data = pts)
##
## Coefficients:
## (Intercept) x
## -2789.2429 0.6305
##
## Sum of Squares: 123721.9
I was only able to get a sum of squares of 123721.9, it never prompted me to select points.
m1 <- lm(runs ~ at_bats, data = mlb11)
summary(m1)
##
## Call:
## lm(formula = runs ~ at_bats, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -125.58 -47.05 -16.59 54.40 176.87
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2789.2429 853.6957 -3.267 0.002871 **
## at_bats 0.6305 0.1545 4.080 0.000339 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 66.47 on 28 degrees of freedom
## Multiple R-squared: 0.3729, Adjusted R-squared: 0.3505
## F-statistic: 16.65 on 1 and 28 DF, p-value: 0.0003388
m2 <- lm(runs ~ homeruns, data = mlb11)
summary(m2)
##
## Call:
## lm(formula = runs ~ homeruns, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -91.615 -33.410 3.231 24.292 104.631
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 415.2389 41.6779 9.963 1.04e-10 ***
## homeruns 1.8345 0.2677 6.854 1.90e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.29 on 28 degrees of freedom
## Multiple R-squared: 0.6266, Adjusted R-squared: 0.6132
## F-statistic: 46.98 on 1 and 28 DF, p-value: 1.9e-07
plot(mlb11$runs ~ mlb11$at_bats)
abline(m1)
plot(m1$residuals ~ mlb11$at_bats)
abline(h = 0, lty = 3)
hist(m1$residuals)
qqnorm(m1$residuals)
qqline(m1$residuals)
m3 <- lm(runs ~ hits, data = mlb11)
plot(mlb11$runs ~ mlb11$hits)
abline(m3)
summary(m3)
##
## Call:
## lm(formula = runs ~ hits, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -103.718 -27.179 -5.233 19.322 140.693
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -375.5600 151.1806 -2.484 0.0192 *
## hits 0.7589 0.1071 7.085 1.04e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.23 on 28 degrees of freedom
## Multiple R-squared: 0.6419, Adjusted R-squared: 0.6292
## F-statistic: 50.2 on 1 and 28 DF, p-value: 1.043e-07
m4 <- lm(runs ~ bat_avg, data = mlb11)
summary(m4)
##
## Call:
## lm(formula = runs ~ bat_avg, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -94.676 -26.303 -5.496 28.482 131.113
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -642.8 183.1 -3.511 0.00153 **
## bat_avg 5242.2 717.3 7.308 5.88e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 49.23 on 28 degrees of freedom
## Multiple R-squared: 0.6561, Adjusted R-squared: 0.6438
## F-statistic: 53.41 on 1 and 28 DF, p-value: 5.877e-08
plot(mlb11$runs ~ mlb11$bat_avg)
abline(m4)
hist(m4$residuals)
plot(m4$residuals ~ mlb11$bat_avg)
abline(h = 0, lty = 3)
qqnorm(m4$residuals)
qqline(m4$residuals)
plot(mlb11$runs ~ mlb11$new_obs)
abline(m5)
hist(m5$residuals)
plot(m5$residuals ~ mlb11$new_obs)
abline(h = 0, lty = 3)
qqnorm(m5$residuals)
qqline(m5$residuals)