install.packages("ISLR")
Error in install.packages : Updating loaded packages
library(ISLR)
str(Hitters)
'data.frame':   322 obs. of  20 variables:
 $ AtBat    : int  293 315 479 496 321 594 185 298 323 401 ...
 $ Hits     : int  66 81 130 141 87 169 37 73 81 92 ...
 $ HmRun    : int  1 7 18 20 10 4 1 0 6 17 ...
 $ Runs     : int  30 24 66 65 39 74 23 24 26 49 ...
 $ RBI      : int  29 38 72 78 42 51 8 24 32 66 ...
 $ Walks    : int  14 39 76 37 30 35 21 7 8 65 ...
 $ Years    : int  1 14 3 11 2 11 2 3 2 13 ...
 $ CAtBat   : int  293 3449 1624 5628 396 4408 214 509 341 5206 ...
 $ CHits    : int  66 835 457 1575 101 1133 42 108 86 1332 ...
 $ CHmRun   : int  1 69 63 225 12 19 1 0 6 253 ...
 $ CRuns    : int  30 321 224 828 48 501 30 41 32 784 ...
 $ CRBI     : int  29 414 266 838 46 336 9 37 34 890 ...
 $ CWalks   : int  14 375 263 354 33 194 24 12 8 866 ...
 $ League   : Factor w/ 2 levels "A","N": 1 2 1 2 2 1 2 1 2 1 ...
 $ Division : Factor w/ 2 levels "E","W": 1 2 2 1 1 2 1 2 2 1 ...
 $ PutOuts  : int  446 632 880 200 805 282 76 121 143 0 ...
 $ Assists  : int  33 43 82 11 40 421 127 283 290 0 ...
 $ Errors   : int  20 10 14 3 4 25 7 9 19 0 ...
 $ Salary   : num  NA 475 480 500 91.5 750 70 100 75 1100 ...
 $ NewLeague: Factor w/ 2 levels "A","N": 1 2 1 2 2 1 1 1 2 1 ...
str(Hitters$Salary)
 num [1:322] NA 475 480 500 91.5 750 70 100 75 1100 ...
str(Hitters$Hits)
 int [1:322] 66 81 130 141 87 169 37 73 81 92 ...
Hitters_Fixed =na.omit(Hitters)
reg_out <- lm(Salary~Hits, data = Hitters_Fixed)
reg_out

Call:
lm(formula = Salary ~ Hits, data = Hitters_Fixed)

Coefficients:
(Intercept)         Hits  
     63.049        4.385  
summary(reg_out)

Call:
lm(formula = Salary ~ Hits, data = Hitters_Fixed)

Residuals:
    Min      1Q  Median      3Q     Max 
-893.99 -245.63  -59.08  181.12 2059.90 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  63.0488    64.9822   0.970    0.333    
Hits          4.3854     0.5561   7.886 8.53e-14 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 406.2 on 261 degrees of freedom
Multiple R-squared:  0.1924,    Adjusted R-squared:  0.1893 
F-statistic: 62.19 on 1 and 261 DF,  p-value: 8.531e-14
hist(Hitters_Fixed$Salary)

Divides the Plots windows into the number of rows and columns specified in the brackets.

par(mfrow=c(2,2)) divides it up into two rows and two columns

Plot the data points on a graph

x = independent variable

y = dependent variable

salary.graph<-ggplot(Hitters_Fixed, aes(x=Salary, y=Hits)) + geom_point()
salary.graph

Add the linear regression line to the plotted data

Add the regression line using geom_smooth() and typing in lm as your method for creating the line.

This will add the line of the linear regression as well as the standard error of

the estimate (in this case +/- 0.01) as a light grey stripe surrounding the Blue line:


salary.graph <- salary.graph + geom_smooth(method="lm", col="blue")

salary.graph

str(Hitters_Fixed)
'data.frame':   263 obs. of  20 variables:
 $ AtBat    : int  315 479 496 321 594 185 298 323 401 574 ...
 $ Hits     : int  81 130 141 87 169 37 73 81 92 159 ...
 $ HmRun    : int  7 18 20 10 4 1 0 6 17 21 ...
 $ Runs     : int  24 66 65 39 74 23 24 26 49 107 ...
 $ RBI      : int  38 72 78 42 51 8 24 32 66 75 ...
 $ Walks    : int  39 76 37 30 35 21 7 8 65 59 ...
 $ Years    : int  14 3 11 2 11 2 3 2 13 10 ...
 $ CAtBat   : int  3449 1624 5628 396 4408 214 509 341 5206 4631 ...
 $ CHits    : int  835 457 1575 101 1133 42 108 86 1332 1300 ...
 $ CHmRun   : int  69 63 225 12 19 1 0 6 253 90 ...
 $ CRuns    : int  321 224 828 48 501 30 41 32 784 702 ...
 $ CRBI     : int  414 266 838 46 336 9 37 34 890 504 ...
 $ CWalks   : int  375 263 354 33 194 24 12 8 866 488 ...
 $ League   : Factor w/ 2 levels "A","N": 2 1 2 2 1 2 1 2 1 1 ...
 $ Division : Factor w/ 2 levels "E","W": 2 2 1 1 2 1 2 2 1 1 ...
 $ PutOuts  : int  632 880 200 805 282 76 121 143 0 238 ...
 $ Assists  : int  43 82 11 40 421 127 283 290 0 445 ...
 $ Errors   : int  10 14 3 4 25 7 9 19 0 22 ...
 $ Salary   : num  475 480 500 91.5 750 ...
 $ NewLeague: Factor w/ 2 levels "A","N": 2 1 2 2 1 1 1 2 1 1 ...
 - attr(*, "na.action")= 'omit' Named int [1:59] 1 16 19 23 31 33 37 39 40 42 ...
  ..- attr(*, "names")= chr [1:59] "-Andy Allanson" "-Billy Beane" "-Bruce Bochte" "-Bob Boone" ...
mr_out <- lm(Salary~., Hitters_Fixed)
summary(mr_out)

Call:
lm(formula = Salary ~ ., data = Hitters_Fixed)

Residuals:
    Min      1Q  Median      3Q     Max 
-907.62 -178.35  -31.11  139.09 1877.04 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  163.10359   90.77854   1.797 0.073622 .  
AtBat         -1.97987    0.63398  -3.123 0.002008 ** 
Hits           7.50077    2.37753   3.155 0.001808 ** 
HmRun          4.33088    6.20145   0.698 0.485616    
Runs          -2.37621    2.98076  -0.797 0.426122    
RBI           -1.04496    2.60088  -0.402 0.688204    
Walks          6.23129    1.82850   3.408 0.000766 ***
Years         -3.48905   12.41219  -0.281 0.778874    
CAtBat        -0.17134    0.13524  -1.267 0.206380    
CHits          0.13399    0.67455   0.199 0.842713    
CHmRun        -0.17286    1.61724  -0.107 0.914967    
CRuns          1.45430    0.75046   1.938 0.053795 .  
CRBI           0.80771    0.69262   1.166 0.244691    
CWalks        -0.81157    0.32808  -2.474 0.014057 *  
LeagueN       62.59942   79.26140   0.790 0.430424    
DivisionW   -116.84925   40.36695  -2.895 0.004141 ** 
PutOuts        0.28189    0.07744   3.640 0.000333 ***
Assists        0.37107    0.22120   1.678 0.094723 .  
Errors        -3.36076    4.39163  -0.765 0.444857    
NewLeagueN   -24.76233   79.00263  -0.313 0.754218    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 315.6 on 243 degrees of freedom
Multiple R-squared:  0.5461,    Adjusted R-squared:  0.5106 
F-statistic: 15.39 on 19 and 243 DF,  p-value: < 2.2e-16
summary(best_out)
Subset selection object
Call: regsubsets.formula(Salary ~ ., data = Hitters_Fixed, nvmax = 18)
19 Variables  (and intercept)
           Forced in Forced out
AtBat          FALSE      FALSE
Hits           FALSE      FALSE
HmRun          FALSE      FALSE
Runs           FALSE      FALSE
RBI            FALSE      FALSE
Walks          FALSE      FALSE
Years          FALSE      FALSE
CAtBat         FALSE      FALSE
CHits          FALSE      FALSE
CHmRun         FALSE      FALSE
CRuns          FALSE      FALSE
CRBI           FALSE      FALSE
CWalks         FALSE      FALSE
LeagueN        FALSE      FALSE
DivisionW      FALSE      FALSE
PutOuts        FALSE      FALSE
Assists        FALSE      FALSE
Errors         FALSE      FALSE
NewLeagueN     FALSE      FALSE
1 subsets of each size up to 18
Selection Algorithm: exhaustive
          AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns CRBI CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
1  ( 1 )  " "   " "  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     " "       " "     " "     " "    " "       
2  ( 1 )  " "   "*"  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     " "       " "     " "     " "    " "       
3  ( 1 )  " "   "*"  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     " "       "*"     " "     " "    " "       
4  ( 1 )  " "   "*"  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     "*"       "*"     " "     " "    " "       
5  ( 1 )  "*"   "*"  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     "*"       "*"     " "     " "    " "       
6  ( 1 )  "*"   "*"  " "   " "  " " "*"   " "   " "    " "   " "    " "   "*"  " "    " "     "*"       "*"     " "     " "    " "       
7  ( 1 )  " "   "*"  " "   " "  " " "*"   " "   "*"    "*"   "*"    " "   " "  " "    " "     "*"       "*"     " "     " "    " "       
8  ( 1 )  "*"   "*"  " "   " "  " " "*"   " "   " "    " "   "*"    "*"   " "  "*"    " "     "*"       "*"     " "     " "    " "       
9  ( 1 )  "*"   "*"  " "   " "  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    " "     "*"       "*"     " "     " "    " "       
10  ( 1 ) "*"   "*"  " "   " "  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    " "     "*"       "*"     "*"     " "    " "       
11  ( 1 ) "*"   "*"  " "   " "  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     " "    " "       
12  ( 1 ) "*"   "*"  " "   "*"  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     " "    " "       
13  ( 1 ) "*"   "*"  " "   "*"  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    " "       
14  ( 1 ) "*"   "*"  "*"   "*"  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    " "       
15  ( 1 ) "*"   "*"  "*"   "*"  " " "*"   " "   "*"    "*"   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    " "       
16  ( 1 ) "*"   "*"  "*"   "*"  "*" "*"   " "   "*"    "*"   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    " "       
17  ( 1 ) "*"   "*"  "*"   "*"  "*" "*"   " "   "*"    "*"   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    "*"       
18  ( 1 ) "*"   "*"  "*"   "*"  "*" "*"   "*"   "*"    "*"   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    "*"       
round(summary(best_out)$adjr2,3)
 [1] 0.319 0.421 0.445 0.467 0.481 0.497 0.501 0.514 0.518 0.522 0.523 0.522 0.521 0.520 0.518 0.516 0.514 0.513
summary(lm(Salary~PutOuts,data=Hitters_Fixed))

Call:
lm(formula = Salary ~ PutOuts, data = Hitters_Fixed)

Residuals:
    Min      1Q  Median      3Q     Max 
-893.66 -314.08  -71.43  204.19 1857.55 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 395.15532   38.36164   10.30  < 2e-16 ***
PutOuts       0.48423    0.09514    5.09 6.87e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 431.1 on 261 degrees of freedom
Multiple R-squared:  0.09029,   Adjusted R-squared:  0.0868 
F-statistic:  25.9 on 1 and 261 DF,  p-value: 6.871e-07
coef((best_out),18)
 (Intercept)        AtBat         Hits        HmRun         Runs          RBI        Walks        Years       CAtBat        CHits        CRuns         CRBI 
 163.0837964   -1.9793878    7.4449895    4.0330423   -2.2712697   -0.9623673    6.2054965   -3.4272056   -0.1746122    0.1835880    1.4015966    0.7386996 
      CWalks      LeagueN    DivisionW      PutOuts      Assists       Errors   NewLeagueN 
  -0.8017228   63.1230544 -116.8591659    0.2822423    0.3731875   -3.3891302  -25.3135587 
summary(best_out)$rsq[18]
[1] 0.5460945
LS0tDQp0aXRsZTogIkFzc2lnbm1lbnQgNCBSIE5vdGVib29rIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KYGBge3J9DQppbnN0YWxsLnBhY2thZ2VzKCJJU0xSIikNCmBgYA0KYGBge3J9DQpsaWJyYXJ5KElTTFIpDQpgYGANCg0KDQpgYGB7cn0NCnN0cihIaXR0ZXJzKQ0KYGBgDQpgYGB7cn0NCnN0cihIaXR0ZXJzJFNhbGFyeSkNCmBgYA0KYGBge3J9DQpzdHIoSGl0dGVycyRIaXRzKQ0KYGBgDQoNCg0KYGBge3J9DQpIaXR0ZXJzX0ZpeGVkID1uYS5vbWl0KEhpdHRlcnMpDQpgYGANCg0KYGBge3J9DQpyZWdfb3V0IDwtIGxtKFNhbGFyeX5IaXRzLCBkYXRhID0gSGl0dGVyc19GaXhlZCkNCmBgYA0KDQpgYGB7cn0NCnJlZ19vdXQNCmBgYA0KDQpgYGB7cn0NCnN1bW1hcnkocmVnX291dCkNCmBgYA0KYGBge3J9DQpoaXN0KEhpdHRlcnNfRml4ZWQkU2FsYXJ5KQ0KYGBgDQoNCmBgYHtyfQ0KaGlzdChIaXR0ZXJzX0ZpeGVkJEhpdHMpDQpgYGANCmBgYHtyfQ0KcGxvdChTYWxhcnkgfiBIaXRzLCBkYXRhID0gSGl0dGVyc19GaXhlZCApDQpgYGANCiMgRGl2aWRlcyB0aGUgUGxvdHMgd2luZG93cyBpbnRvIHRoZSBudW1iZXIgb2Ygcm93cyBhbmQgY29sdW1ucyBzcGVjaWZpZWQgaW4gdGhlIGJyYWNrZXRzLg0KIyBwYXIobWZyb3c9YygyLDIpKSBkaXZpZGVzIGl0IHVwIGludG8gdHdvIHJvd3MgYW5kIHR3byBjb2x1bW5zDQpgYGB7cn0NCnBhcihtZnJvdz1jKDIsMikpDQpgYGANCg0KYGBge3J9DQpwbG90KFNhbGFyeSB+IEhpdHMsIGRhdGEgPSBIaXR0ZXJzX0ZpeGVkICkNCmBgYA0KDQpgYGB7cn0NCnBhcihtZnJvdz1jKDEsMSkpDQpgYGANCg0KYGBge3J9DQpwbG90KFNhbGFyeSB+IEhpdHMsIGRhdGEgPSBIaXR0ZXJzX0ZpeGVkICkNCmBgYA0KIyBQbG90IHRoZSBkYXRhIHBvaW50cyBvbiBhIGdyYXBoDQojIHggPSBpbmRlcGVuZGVudCB2YXJpYWJsZQ0KIyB5ID0gZGVwZW5kZW50IHZhcmlhYmxlDQpgYGB7cn0NCnNhbGFyeS5ncmFwaDwtZ2dwbG90KEhpdHRlcnNfRml4ZWQsIGFlcyh4PVNhbGFyeSwgeT1IaXRzKSkgKyBnZW9tX3BvaW50KCkNCnNhbGFyeS5ncmFwaA0KYGBgDQoNCiMgQWRkIHRoZSBsaW5lYXIgcmVncmVzc2lvbiBsaW5lIHRvIHRoZSBwbG90dGVkIGRhdGENCiMgQWRkIHRoZSByZWdyZXNzaW9uIGxpbmUgdXNpbmcgZ2VvbV9zbW9vdGgoKSBhbmQgdHlwaW5nIGluIGxtIGFzIHlvdXIgbWV0aG9kIGZvciBjcmVhdGluZyB0aGUgbGluZS4gDQojIFRoaXMgd2lsbCBhZGQgdGhlIGxpbmUgb2YgdGhlIGxpbmVhciByZWdyZXNzaW9uIGFzIHdlbGwgYXMgdGhlIHN0YW5kYXJkIGVycm9yIG9mIA0KIyB0aGUgZXN0aW1hdGUgKGluIHRoaXMgY2FzZSArLy0gMC4wMSkgYXMgYSBsaWdodCBncmV5IHN0cmlwZSBzdXJyb3VuZGluZyB0aGUgQmx1ZSBsaW5lOg0KDQpgYGB7cn0NCg0Kc2FsYXJ5LmdyYXBoIDwtIHNhbGFyeS5ncmFwaCArIGdlb21fc21vb3RoKG1ldGhvZD0ibG0iLCBjb2w9ImJsdWUiKQ0KDQpzYWxhcnkuZ3JhcGgNCmBgYA0KDQpgYGB7cn0NCnN0cihIaXR0ZXJzX0ZpeGVkKQ0KYGBgDQpgYGB7cn0NCm1yX291dCA8LSBsbShTYWxhcnl+LiwgSGl0dGVyc19GaXhlZCkNCnN1bW1hcnkobXJfb3V0KQ0KYGBgDQpgYGB7cn0NCmJlc3Rfb3V0IDwtIHJlZ3N1YnNldHMoU2FsYXJ5fi4sZGF0YSA9IEhpdHRlcnNfRml4ZWQsIG52bWF4ID0gMTgpDQpzdW1tYXJ5KGJlc3Rfb3V0KQ0KYGBgDQoNCg0KYGBge3J9DQpyb3VuZChzdW1tYXJ5KGJlc3Rfb3V0KSRhZGpyMiwzKQ0KYGBgDQpgYGB7cn0NCnN1bW1hcnkobG0oU2FsYXJ5flB1dE91dHMsZGF0YT1IaXR0ZXJzX0ZpeGVkKSkNCmBgYA0KYGBge3J9DQpjb2VmKChiZXN0X291dCksMTgpDQpgYGANCg0KYGBge3J9DQpzdW1tYXJ5KGJlc3Rfb3V0KSRyc3FbMThdDQpgYGANCg0K