download.file("http://www.openintro.org/stat/data/mlb11.RData", destfile = "mlb11.RData")
load("mlb11.RData")
boxplot(mlb11$runs ~ mlb11$bat_avg)
plot(mlb11$at_bats, mlb11$hits, # plot the variables
xlab="At Bats", # x−axis label
ylab="Hits") # y−axis label
reg1 <- lm(at_bats~hits,data=mlb11)
abline(reg1)
plot(mlb11$at_bats, mlb11$runs, # plot the variables
xlab="At Bats", # x−axis label
ylab="Runs") # y−axis label
cor(mlb11$runs, mlb11$at_bats)
## [1] 0.610627
plot_ss(x = mlb11$at_bats, y = mlb11$runs, showSquares = TRUE)
## Click two points to make a line.
## Call:
## lm(formula = y ~ x, data = pts)
##
## Coefficients:
## (Intercept) x
## -2789.2429 0.6305
##
## Sum of Squares: 123721.9
m1 <- lm(runs ~ at_bats, data = mlb11)
summary(m1)
##
## Call:
## lm(formula = runs ~ at_bats, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -125.58 -47.05 -16.59 54.40 176.87
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2789.2429 853.6957 -3.267 0.002871 **
## at_bats 0.6305 0.1545 4.080 0.000339 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 66.47 on 28 degrees of freedom
## Multiple R-squared: 0.3729, Adjusted R-squared: 0.3505
## F-statistic: 16.65 on 1 and 28 DF, p-value: 0.0003388
m2 <- lm(runs ~ homeruns, data = mlb11)
summary(m2)
##
## Call:
## lm(formula = runs ~ homeruns, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -91.615 -33.410 3.231 24.292 104.631
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 415.2389 41.6779 9.963 1.04e-10 ***
## homeruns 1.8345 0.2677 6.854 1.90e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.29 on 28 degrees of freedom
## Multiple R-squared: 0.6266, Adjusted R-squared: 0.6132
## F-statistic: 46.98 on 1 and 28 DF, p-value: 1.9e-07
plot(mlb11$runs ~ mlb11$at_bats)
abline(m1)
summary(m1)
##
## Call:
## lm(formula = runs ~ at_bats, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -125.58 -47.05 -16.59 54.40 176.87
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2789.2429 853.6957 -3.267 0.002871 **
## at_bats 0.6305 0.1545 4.080 0.000339 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 66.47 on 28 degrees of freedom
## Multiple R-squared: 0.3729, Adjusted R-squared: 0.3505
## F-statistic: 16.65 on 1 and 28 DF, p-value: 0.0003388
-2789.2429 + (0.6305*5578)
## [1] 727.6861
plot(m1$residuals ~ mlb11$at_bats)
abline(h = 0, lty = 3) # adds a horizontal dashed line at y = 0
plot_ss(x = mlb11$bat_avg, y = mlb11$runs, showSquares = TRUE)
## Click two points to make a line.
## Call:
## lm(formula = y ~ x, data = pts)
##
## Coefficients:
## (Intercept) x
## -642.8 5242.2
##
## Sum of Squares: 67849.52
hist(m1$residuals)
qqnorm(m1$residuals)
qqline(m1$residuals) # adds diagonal line to the normal prob plot
m3 <- lm(runs ~ bat_avg, data = mlb11)
plot(mlb11$runs ~ mlb11$bat_avg)
abline(m3)
m4 <- lm(runs ~ stolen_bases, data = mlb11)
plot(mlb11$runs ~ mlb11$stolen_bases)
abline(m4)
summary(m1)
##
## Call:
## lm(formula = runs ~ at_bats, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -125.58 -47.05 -16.59 54.40 176.87
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2789.2429 853.6957 -3.267 0.002871 **
## at_bats 0.6305 0.1545 4.080 0.000339 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 66.47 on 28 degrees of freedom
## Multiple R-squared: 0.3729, Adjusted R-squared: 0.3505
## F-statistic: 16.65 on 1 and 28 DF, p-value: 0.0003388
summary(m3)
##
## Call:
## lm(formula = runs ~ bat_avg, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -94.676 -26.303 -5.496 28.482 131.113
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -642.8 183.1 -3.511 0.00153 **
## bat_avg 5242.2 717.3 7.308 5.88e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 49.23 on 28 degrees of freedom
## Multiple R-squared: 0.6561, Adjusted R-squared: 0.6438
## F-statistic: 53.41 on 1 and 28 DF, p-value: 5.877e-08
summary(m4)
##
## Call:
## lm(formula = runs ~ stolen_bases, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -139.94 -62.87 10.01 38.54 182.49
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 677.3074 58.9751 11.485 4.17e-12 ***
## stolen_bases 0.1491 0.5211 0.286 0.777
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 83.82 on 28 degrees of freedom
## Multiple R-squared: 0.002914, Adjusted R-squared: -0.0327
## F-statistic: 0.08183 on 1 and 28 DF, p-value: 0.7769
m1 <- lm(runs ~ at_bats, data = mlb11)
m2 <- lm(runs ~ homeruns, data = mlb11)
m3 <- lm(runs ~ bat_avg, data = mlb11)
m4 <- lm(runs ~ stolen_bases, data = mlb11)
m5 <- lm(runs ~ strikeouts, data = mlb11)
m6 <- lm(runs ~ hits, data = mlb11)
m7 <- lm(runs ~ wins, data = mlb11)
summary(m3)
##
## Call:
## lm(formula = runs ~ bat_avg, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -94.676 -26.303 -5.496 28.482 131.113
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -642.8 183.1 -3.511 0.00153 **
## bat_avg 5242.2 717.3 7.308 5.88e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 49.23 on 28 degrees of freedom
## Multiple R-squared: 0.6561, Adjusted R-squared: 0.6438
## F-statistic: 53.41 on 1 and 28 DF, p-value: 5.877e-08
plot(mlb11$runs ~ mlb11$new_obs)
abline(m10)
plot(mlb11$runs ~ mlb11$new_slug)
abline(m9)
plot(mlb11$runs ~ mlb11$new_onbase)
abline(m8)
library(xtable)
## Warning: package 'xtable' was built under R version 3.6.3
d <- data.frame (rank=c(1,2,3,4,5,6,7,8,9,10), mset_var=c(10,9,8,3,6,2,7,1,5,4), Rsquared=c(0.9349, 0.8969, 0.8491, 0.6561, 0.6419, 0.6266, 0.361, 0.3729, 0.1694, 0.0029))
d
## rank mset_var Rsquared
## 1 1 10 0.9349
## 2 2 9 0.8969
## 3 3 8 0.8491
## 4 4 3 0.6561
## 5 5 6 0.6419
## 6 6 2 0.6266
## 7 7 7 0.3610
## 8 8 1 0.3729
## 9 9 5 0.1694
## 10 10 4 0.0029
# mset_var 10 = new onbase with slugging
# mset_var 9 = slugging
# mset_var 8 = onbase
# mset_var 3 = bat_avg
# mset_var 6 = hits
# mset_var 2 = homeruns
# mset_var 7 = wins
# mset_var 1 = at_bat
# mset_var 5 = strikeouts
# mset_var 4 = stolenbases
xtable(d)
## % latex table generated in R 3.6.2 by xtable 1.8-4 package
## % Sun Apr 12 09:57:23 2020
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrr}
## \hline
## & rank & mset\_var & Rsquared \\
## \hline
## 1 & 1.00 & 10.00 & 0.93 \\
## 2 & 2.00 & 9.00 & 0.90 \\
## 3 & 3.00 & 8.00 & 0.85 \\
## 4 & 4.00 & 3.00 & 0.66 \\
## 5 & 5.00 & 6.00 & 0.64 \\
## 6 & 6.00 & 2.00 & 0.63 \\
## 7 & 7.00 & 7.00 & 0.36 \\
## 8 & 8.00 & 1.00 & 0.37 \\
## 9 & 9.00 & 5.00 & 0.17 \\
## 10 & 10.00 & 4.00 & 0.00 \\
## \hline
## \end{tabular}
## \end{table}
R-Squared table
qqnorm(m10$residuals)
qqline(m10$residuals) # adds diagonal line to the normal prob plot