install.packages("ISLR")
Error in install.packages : Updating loaded packages
library(ISLR)
str(Hitters)
'data.frame': 322 obs. of 20 variables:
$ AtBat : int 293 315 479 496 321 594 185 298 323 401 ...
$ Hits : int 66 81 130 141 87 169 37 73 81 92 ...
$ HmRun : int 1 7 18 20 10 4 1 0 6 17 ...
$ Runs : int 30 24 66 65 39 74 23 24 26 49 ...
$ RBI : int 29 38 72 78 42 51 8 24 32 66 ...
$ Walks : int 14 39 76 37 30 35 21 7 8 65 ...
$ Years : int 1 14 3 11 2 11 2 3 2 13 ...
$ CAtBat : int 293 3449 1624 5628 396 4408 214 509 341 5206 ...
$ CHits : int 66 835 457 1575 101 1133 42 108 86 1332 ...
$ CHmRun : int 1 69 63 225 12 19 1 0 6 253 ...
$ CRuns : int 30 321 224 828 48 501 30 41 32 784 ...
$ CRBI : int 29 414 266 838 46 336 9 37 34 890 ...
$ CWalks : int 14 375 263 354 33 194 24 12 8 866 ...
$ League : Factor w/ 2 levels "A","N": 1 2 1 2 2 1 2 1 2 1 ...
$ Division : Factor w/ 2 levels "E","W": 1 2 2 1 1 2 1 2 2 1 ...
$ PutOuts : int 446 632 880 200 805 282 76 121 143 0 ...
$ Assists : int 33 43 82 11 40 421 127 283 290 0 ...
$ Errors : int 20 10 14 3 4 25 7 9 19 0 ...
$ Salary : num NA 475 480 500 91.5 750 70 100 75 1100 ...
$ NewLeague: Factor w/ 2 levels "A","N": 1 2 1 2 2 1 1 1 2 1 ...
str(Hitters$Salary)
num [1:322] NA 475 480 500 91.5 750 70 100 75 1100 ...
str(Hitters$Hits)
int [1:322] 66 81 130 141 87 169 37 73 81 92 ...
Hitters_Fixed =na.omit(Hitters)
reg_out <- lm(Salary~Hits, data = Hitters_Fixed)
reg_out
Call:
lm(formula = Salary ~ Hits, data = Hitters_Fixed)
Coefficients:
(Intercept) Hits
63.049 4.385
summary(reg_out)
Call:
lm(formula = Salary ~ Hits, data = Hitters_Fixed)
Residuals:
Min 1Q Median 3Q Max
-893.99 -245.63 -59.08 181.12 2059.90
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 63.0488 64.9822 0.970 0.333
Hits 4.3854 0.5561 7.886 8.53e-14 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 406.2 on 261 degrees of freedom
Multiple R-squared: 0.1924, Adjusted R-squared: 0.1893
F-statistic: 62.19 on 1 and 261 DF, p-value: 8.531e-14
hist(Hitters_Fixed$Salary)


Divides the Plots windows into the number of rows and columns specified in the brackets.
par(mfrow=c(2,2)) divides it up into two rows and two columns


Plot the data points on a graph
x = independent variable
y = dependent variable
salary.graph<-ggplot(Hitters_Fixed, aes(x=Salary, y=Hits)) + geom_point()
salary.graph

Add the linear regression line to the plotted data
Add the regression line using geom_smooth() and typing in lm as your method for creating the line.
This will add the line of the linear regression as well as the standard error of
the estimate (in this case +/- 0.01) as a light grey stripe surrounding the Blue line:
salary.graph <- salary.graph + geom_smooth(method="lm", col="blue")
salary.graph

str(Hitters_Fixed)
'data.frame': 263 obs. of 20 variables:
$ AtBat : int 315 479 496 321 594 185 298 323 401 574 ...
$ Hits : int 81 130 141 87 169 37 73 81 92 159 ...
$ HmRun : int 7 18 20 10 4 1 0 6 17 21 ...
$ Runs : int 24 66 65 39 74 23 24 26 49 107 ...
$ RBI : int 38 72 78 42 51 8 24 32 66 75 ...
$ Walks : int 39 76 37 30 35 21 7 8 65 59 ...
$ Years : int 14 3 11 2 11 2 3 2 13 10 ...
$ CAtBat : int 3449 1624 5628 396 4408 214 509 341 5206 4631 ...
$ CHits : int 835 457 1575 101 1133 42 108 86 1332 1300 ...
$ CHmRun : int 69 63 225 12 19 1 0 6 253 90 ...
$ CRuns : int 321 224 828 48 501 30 41 32 784 702 ...
$ CRBI : int 414 266 838 46 336 9 37 34 890 504 ...
$ CWalks : int 375 263 354 33 194 24 12 8 866 488 ...
$ League : Factor w/ 2 levels "A","N": 2 1 2 2 1 2 1 2 1 1 ...
$ Division : Factor w/ 2 levels "E","W": 2 2 1 1 2 1 2 2 1 1 ...
$ PutOuts : int 632 880 200 805 282 76 121 143 0 238 ...
$ Assists : int 43 82 11 40 421 127 283 290 0 445 ...
$ Errors : int 10 14 3 4 25 7 9 19 0 22 ...
$ Salary : num 475 480 500 91.5 750 ...
$ NewLeague: Factor w/ 2 levels "A","N": 2 1 2 2 1 1 1 2 1 1 ...
- attr(*, "na.action")= 'omit' Named int [1:59] 1 16 19 23 31 33 37 39 40 42 ...
..- attr(*, "names")= chr [1:59] "-Andy Allanson" "-Billy Beane" "-Bruce Bochte" "-Bob Boone" ...
mr_out <- lm(Salary~., Hitters_Fixed)
summary(mr_out)
Call:
lm(formula = Salary ~ ., data = Hitters_Fixed)
Residuals:
Min 1Q Median 3Q Max
-907.62 -178.35 -31.11 139.09 1877.04
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 163.10359 90.77854 1.797 0.073622 .
AtBat -1.97987 0.63398 -3.123 0.002008 **
Hits 7.50077 2.37753 3.155 0.001808 **
HmRun 4.33088 6.20145 0.698 0.485616
Runs -2.37621 2.98076 -0.797 0.426122
RBI -1.04496 2.60088 -0.402 0.688204
Walks 6.23129 1.82850 3.408 0.000766 ***
Years -3.48905 12.41219 -0.281 0.778874
CAtBat -0.17134 0.13524 -1.267 0.206380
CHits 0.13399 0.67455 0.199 0.842713
CHmRun -0.17286 1.61724 -0.107 0.914967
CRuns 1.45430 0.75046 1.938 0.053795 .
CRBI 0.80771 0.69262 1.166 0.244691
CWalks -0.81157 0.32808 -2.474 0.014057 *
LeagueN 62.59942 79.26140 0.790 0.430424
DivisionW -116.84925 40.36695 -2.895 0.004141 **
PutOuts 0.28189 0.07744 3.640 0.000333 ***
Assists 0.37107 0.22120 1.678 0.094723 .
Errors -3.36076 4.39163 -0.765 0.444857
NewLeagueN -24.76233 79.00263 -0.313 0.754218
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 315.6 on 243 degrees of freedom
Multiple R-squared: 0.5461, Adjusted R-squared: 0.5106
F-statistic: 15.39 on 19 and 243 DF, p-value: < 2.2e-16
summary(best_out)
Subset selection object
Call: regsubsets.formula(Salary ~ ., data = Hitters_Fixed, nvmax = 18)
19 Variables (and intercept)
Forced in Forced out
AtBat FALSE FALSE
Hits FALSE FALSE
HmRun FALSE FALSE
Runs FALSE FALSE
RBI FALSE FALSE
Walks FALSE FALSE
Years FALSE FALSE
CAtBat FALSE FALSE
CHits FALSE FALSE
CHmRun FALSE FALSE
CRuns FALSE FALSE
CRBI FALSE FALSE
CWalks FALSE FALSE
LeagueN FALSE FALSE
DivisionW FALSE FALSE
PutOuts FALSE FALSE
Assists FALSE FALSE
Errors FALSE FALSE
NewLeagueN FALSE FALSE
1 subsets of each size up to 18
Selection Algorithm: exhaustive
AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns CRBI CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
1 ( 1 ) " " " " " " " " " " " " " " " " " " " " " " "*" " " " " " " " " " " " " " "
2 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " " "*" " " " " " " " " " " " " " "
3 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " " "*" " " " " " " "*" " " " " " "
4 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " " "*" " " " " "*" "*" " " " " " "
5 ( 1 ) "*" "*" " " " " " " " " " " " " " " " " " " "*" " " " " "*" "*" " " " " " "
6 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " " " "*" " " " " "*" "*" " " " " " "
7 ( 1 ) " " "*" " " " " " " "*" " " "*" "*" "*" " " " " " " " " "*" "*" " " " " " "
8 ( 1 ) "*" "*" " " " " " " "*" " " " " " " "*" "*" " " "*" " " "*" "*" " " " " " "
9 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*" "*" "*" " " "*" "*" " " " " " "
10 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*" "*" "*" " " "*" "*" "*" " " " "
11 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*" "*" "*" "*" "*" "*" "*" " " " "
12 ( 1 ) "*" "*" " " "*" " " "*" " " "*" " " " " "*" "*" "*" "*" "*" "*" "*" " " " "
13 ( 1 ) "*" "*" " " "*" " " "*" " " "*" " " " " "*" "*" "*" "*" "*" "*" "*" "*" " "
14 ( 1 ) "*" "*" "*" "*" " " "*" " " "*" " " " " "*" "*" "*" "*" "*" "*" "*" "*" " "
15 ( 1 ) "*" "*" "*" "*" " " "*" " " "*" "*" " " "*" "*" "*" "*" "*" "*" "*" "*" " "
16 ( 1 ) "*" "*" "*" "*" "*" "*" " " "*" "*" " " "*" "*" "*" "*" "*" "*" "*" "*" " "
17 ( 1 ) "*" "*" "*" "*" "*" "*" " " "*" "*" " " "*" "*" "*" "*" "*" "*" "*" "*" "*"
18 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" " " "*" "*" "*" "*" "*" "*" "*" "*" "*"
round(summary(best_out)$adjr2,3)
[1] 0.319 0.421 0.445 0.467 0.481 0.497 0.501 0.514 0.518 0.522 0.523 0.522 0.521 0.520 0.518 0.516 0.514 0.513
summary(lm(Salary~PutOuts,data=Hitters_Fixed))
Call:
lm(formula = Salary ~ PutOuts, data = Hitters_Fixed)
Residuals:
Min 1Q Median 3Q Max
-893.66 -314.08 -71.43 204.19 1857.55
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 395.15532 38.36164 10.30 < 2e-16 ***
PutOuts 0.48423 0.09514 5.09 6.87e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 431.1 on 261 degrees of freedom
Multiple R-squared: 0.09029, Adjusted R-squared: 0.0868
F-statistic: 25.9 on 1 and 261 DF, p-value: 6.871e-07
coef((best_out),18)
(Intercept) AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CRuns CRBI
163.0837964 -1.9793878 7.4449895 4.0330423 -2.2712697 -0.9623673 6.2054965 -3.4272056 -0.1746122 0.1835880 1.4015966 0.7386996
CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
-0.8017228 63.1230544 -116.8591659 0.2822423 0.3731875 -3.3891302 -25.3135587
summary(best_out)$rsq[18]
[1] 0.5460945
LS0tDQp0aXRsZTogIkFzc2lnbm1lbnQgNCBSIE5vdGVib29rIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KYGBge3J9DQppbnN0YWxsLnBhY2thZ2VzKCJJU0xSIikNCmBgYA0KYGBge3J9DQpsaWJyYXJ5KElTTFIpDQpgYGANCg0KDQpgYGB7cn0NCnN0cihIaXR0ZXJzKQ0KYGBgDQpgYGB7cn0NCnN0cihIaXR0ZXJzJFNhbGFyeSkNCmBgYA0KYGBge3J9DQpzdHIoSGl0dGVycyRIaXRzKQ0KYGBgDQoNCg0KYGBge3J9DQpIaXR0ZXJzX0ZpeGVkID1uYS5vbWl0KEhpdHRlcnMpDQpgYGANCg0KYGBge3J9DQpyZWdfb3V0IDwtIGxtKFNhbGFyeX5IaXRzLCBkYXRhID0gSGl0dGVyc19GaXhlZCkNCmBgYA0KDQpgYGB7cn0NCnJlZ19vdXQNCmBgYA0KDQpgYGB7cn0NCnN1bW1hcnkocmVnX291dCkNCmBgYA0KYGBge3J9DQpoaXN0KEhpdHRlcnNfRml4ZWQkU2FsYXJ5KQ0KYGBgDQoNCmBgYHtyfQ0KaGlzdChIaXR0ZXJzX0ZpeGVkJEhpdHMpDQpgYGANCmBgYHtyfQ0KcGxvdChTYWxhcnkgfiBIaXRzLCBkYXRhID0gSGl0dGVyc19GaXhlZCApDQpgYGANCiMgRGl2aWRlcyB0aGUgUGxvdHMgd2luZG93cyBpbnRvIHRoZSBudW1iZXIgb2Ygcm93cyBhbmQgY29sdW1ucyBzcGVjaWZpZWQgaW4gdGhlIGJyYWNrZXRzLg0KIyBwYXIobWZyb3c9YygyLDIpKSBkaXZpZGVzIGl0IHVwIGludG8gdHdvIHJvd3MgYW5kIHR3byBjb2x1bW5zDQpgYGB7cn0NCnBhcihtZnJvdz1jKDIsMikpDQpgYGANCg0KYGBge3J9DQpwbG90KFNhbGFyeSB+IEhpdHMsIGRhdGEgPSBIaXR0ZXJzX0ZpeGVkICkNCmBgYA0KDQpgYGB7cn0NCnBhcihtZnJvdz1jKDEsMSkpDQpgYGANCg0KYGBge3J9DQpwbG90KFNhbGFyeSB+IEhpdHMsIGRhdGEgPSBIaXR0ZXJzX0ZpeGVkICkNCmBgYA0KIyBQbG90IHRoZSBkYXRhIHBvaW50cyBvbiBhIGdyYXBoDQojIHggPSBpbmRlcGVuZGVudCB2YXJpYWJsZQ0KIyB5ID0gZGVwZW5kZW50IHZhcmlhYmxlDQpgYGB7cn0NCnNhbGFyeS5ncmFwaDwtZ2dwbG90KEhpdHRlcnNfRml4ZWQsIGFlcyh4PVNhbGFyeSwgeT1IaXRzKSkgKyBnZW9tX3BvaW50KCkNCnNhbGFyeS5ncmFwaA0KYGBgDQoNCiMgQWRkIHRoZSBsaW5lYXIgcmVncmVzc2lvbiBsaW5lIHRvIHRoZSBwbG90dGVkIGRhdGENCiMgQWRkIHRoZSByZWdyZXNzaW9uIGxpbmUgdXNpbmcgZ2VvbV9zbW9vdGgoKSBhbmQgdHlwaW5nIGluIGxtIGFzIHlvdXIgbWV0aG9kIGZvciBjcmVhdGluZyB0aGUgbGluZS4gDQojIFRoaXMgd2lsbCBhZGQgdGhlIGxpbmUgb2YgdGhlIGxpbmVhciByZWdyZXNzaW9uIGFzIHdlbGwgYXMgdGhlIHN0YW5kYXJkIGVycm9yIG9mIA0KIyB0aGUgZXN0aW1hdGUgKGluIHRoaXMgY2FzZSArLy0gMC4wMSkgYXMgYSBsaWdodCBncmV5IHN0cmlwZSBzdXJyb3VuZGluZyB0aGUgQmx1ZSBsaW5lOg0KDQpgYGB7cn0NCg0Kc2FsYXJ5LmdyYXBoIDwtIHNhbGFyeS5ncmFwaCArIGdlb21fc21vb3RoKG1ldGhvZD0ibG0iLCBjb2w9ImJsdWUiKQ0KDQpzYWxhcnkuZ3JhcGgNCmBgYA0KDQpgYGB7cn0NCnN0cihIaXR0ZXJzX0ZpeGVkKQ0KYGBgDQpgYGB7cn0NCm1yX291dCA8LSBsbShTYWxhcnl+LiwgSGl0dGVyc19GaXhlZCkNCnN1bW1hcnkobXJfb3V0KQ0KYGBgDQpgYGB7cn0NCmJlc3Rfb3V0IDwtIHJlZ3N1YnNldHMoU2FsYXJ5fi4sZGF0YSA9IEhpdHRlcnNfRml4ZWQsIG52bWF4ID0gMTgpDQpzdW1tYXJ5KGJlc3Rfb3V0KQ0KYGBgDQoNCg0KYGBge3J9DQpyb3VuZChzdW1tYXJ5KGJlc3Rfb3V0KSRhZGpyMiwzKQ0KYGBgDQpgYGB7cn0NCnN1bW1hcnkobG0oU2FsYXJ5flB1dE91dHMsZGF0YT1IaXR0ZXJzX0ZpeGVkKSkNCmBgYA0KYGBge3J9DQpjb2VmKChiZXN0X291dCksMTgpDQpgYGANCg0KYGBge3J9DQpzdW1tYXJ5KGJlc3Rfb3V0KSRyc3FbMThdDQpgYGANCg0K