download.file("http://www.openintro.org/stat/data/mlb11.RData", destfile = "mlb11.RData")
load("mlb11.RData")
abc <- "http://www.openintro.org/stat/data/mlb11.RData"
m <- cor(mlb11$runs, mlb11$at_bats)
m
## [1] 0.610627
plot( mlb11$at_bats, mlb11$runs, xlim=c (5200, 6000))
plot_ss(x = mlb11$at_bats, y = mlb11$runs)
## Click two points to make a line.
## Call:
## lm(formula = y ~ x, data = pts)
##
## Coefficients:
## (Intercept) x
## -2789.2429 0.6305
##
## Sum of Squares: 123721.9
plot_ss(x = mlb11$at_bats, y = mlb11$runs, showSquares = TRUE)
## Click two points to make a line.
## Call:
## lm(formula = y ~ x, data = pts)
##
## Coefficients:
## (Intercept) x
## -2789.2429 0.6305
##
## Sum of Squares: 123721.9
plot_ss(x = mlb11$at_bats, y = mlb11$runs, showSquares = TRUE)
## Click two points to make a line.
## Call:
## lm(formula = y ~ x, data = pts)
##
## Coefficients:
## (Intercept) x
## -2789.2429 0.6305
##
## Sum of Squares: 123721.9
m1 <- lm(runs ~ at_bats, data = mlb11)
summary(m1)
##
## Call:
## lm(formula = runs ~ at_bats, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -125.58 -47.05 -16.59 54.40 176.87
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2789.2429 853.6957 -3.267 0.002871 **
## at_bats 0.6305 0.1545 4.080 0.000339 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 66.47 on 28 degrees of freedom
## Multiple R-squared: 0.3729, Adjusted R-squared: 0.3505
## F-statistic: 16.65 on 1 and 28 DF, p-value: 0.0003388
plot (mlb11$homeruns, mlb11$runs)
m1 <- lm(homeruns ~ runs, data = mlb11)
summary(m1)
##
## Call:
## lm(formula = homeruns ~ runs, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.067 -15.794 3.702 15.766 39.232
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -85.15663 34.79698 -2.447 0.0209 *
## runs 0.34154 0.04983 6.854 1.9e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.13 on 28 degrees of freedom
## Multiple R-squared: 0.6266, Adjusted R-squared: 0.6132
## F-statistic: 46.98 on 1 and 28 DF, p-value: 1.9e-07
plot(mlb11$runs ~ mlb11$at_bats)
abline(m1)
(0.34154*5578)-85.15663
## [1] 1819.953
plot(m1$residuals ~ mlb11$at_bats)
abline(h = 0, lty = 3)
### Is there any apparent pattern in the residuals plot? What does this indicate about the linearity of the relationship between runs and at-bats?
hist(m1$residuals)
qqnorm(m1$residuals)
qqline(m1$residuals)
plot(mlb11$runs, mlb11$wins)
plot_ss(x = mlb11$wins, y = mlb11$runs)
## Click two points to make a line.
## Call:
## lm(formula = y ~ x, data = pts)
##
## Coefficients:
## (Intercept) x
## 342.121 4.341
##
## Sum of Squares: 126068.4
plot_ss(x = mlb11$hits, y = mlb11$runs)
## Click two points to make a line.
## Call:
## lm(formula = y ~ x, data = pts)
##
## Coefficients:
## (Intercept) x
## -375.5600 0.7589
##
## Sum of Squares: 70638.75
m2 <- lm(runs ~ at_bats, data = mlb11)
plot(x = mlb11$new_obs, y = mlb11$runs)
abline(m2)
summary(m2)
##
## Call:
## lm(formula = runs ~ at_bats, data = mlb11)
##
## Residuals:
## Min 1Q Median 3Q Max
## -125.58 -47.05 -16.59 54.40 176.87
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2789.2429 853.6957 -3.267 0.002871 **
## at_bats 0.6305 0.1545 4.080 0.000339 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 66.47 on 28 degrees of freedom
## Multiple R-squared: 0.3729, Adjusted R-squared: 0.3505
## F-statistic: 16.65 on 1 and 28 DF, p-value: 0.0003388
plot(m2$residuals ~ mlb11$new_obs)
abline(h = 0, lty = 3)
hist(m2$residuals)
qqnorm(m2$residuals)
qqline(m2$residuals)