df <- read.csv('https://vincentarelbundock.github.io/Rdatasets/csv/ISLR/Hitters.csv')
head(df)
##                   X AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun
## 1    -Andy Allanson   293   66     1   30  29    14     1    293    66      1
## 2       -Alan Ashby   315   81     7   24  38    39    14   3449   835     69
## 3      -Alvin Davis   479  130    18   66  72    76     3   1624   457     63
## 4     -Andre Dawson   496  141    20   65  78    37    11   5628  1575    225
## 5 -Andres Galarraga   321   87    10   39  42    30     2    396   101     12
## 6  -Alfredo Griffin   594  169     4   74  51    35    11   4408  1133     19
##   CRuns CRBI CWalks League Division PutOuts Assists Errors Salary NewLeague
## 1    30   29     14      A        E     446      33     20     NA         A
## 2   321  414    375      N        W     632      43     10  475.0         N
## 3   224  266    263      A        W     880      82     14  480.0         A
## 4   828  838    354      N        E     200      11      3  500.0         N
## 5    48   46     33      N        E     805      40      4   91.5         N
## 6   501  336    194      A        W     282     421     25  750.0         A
str(df)
## 'data.frame':    322 obs. of  21 variables:
##  $ X        : chr  "-Andy Allanson" "-Alan Ashby" "-Alvin Davis" "-Andre Dawson" ...
##  $ AtBat    : int  293 315 479 496 321 594 185 298 323 401 ...
##  $ Hits     : int  66 81 130 141 87 169 37 73 81 92 ...
##  $ HmRun    : int  1 7 18 20 10 4 1 0 6 17 ...
##  $ Runs     : int  30 24 66 65 39 74 23 24 26 49 ...
##  $ RBI      : int  29 38 72 78 42 51 8 24 32 66 ...
##  $ Walks    : int  14 39 76 37 30 35 21 7 8 65 ...
##  $ Years    : int  1 14 3 11 2 11 2 3 2 13 ...
##  $ CAtBat   : int  293 3449 1624 5628 396 4408 214 509 341 5206 ...
##  $ CHits    : int  66 835 457 1575 101 1133 42 108 86 1332 ...
##  $ CHmRun   : int  1 69 63 225 12 19 1 0 6 253 ...
##  $ CRuns    : int  30 321 224 828 48 501 30 41 32 784 ...
##  $ CRBI     : int  29 414 266 838 46 336 9 37 34 890 ...
##  $ CWalks   : int  14 375 263 354 33 194 24 12 8 866 ...
##  $ League   : chr  "A" "N" "A" "N" ...
##  $ Division : chr  "E" "W" "W" "E" ...
##  $ PutOuts  : int  446 632 880 200 805 282 76 121 143 0 ...
##  $ Assists  : int  33 43 82 11 40 421 127 283 290 0 ...
##  $ Errors   : int  20 10 14 3 4 25 7 9 19 0 ...
##  $ Salary   : num  NA 475 480 500 91.5 750 70 100 75 1100 ...
##  $ NewLeague: chr  "A" "N" "A" "N" ...

322 Rows with 21 different columns/variables.

summary(df)
##       X                 AtBat            Hits         HmRun      
##  Length:322         Min.   : 16.0   Min.   :  1   Min.   : 0.00  
##  Class :character   1st Qu.:255.2   1st Qu.: 64   1st Qu.: 4.00  
##  Mode  :character   Median :379.5   Median : 96   Median : 8.00  
##                     Mean   :380.9   Mean   :101   Mean   :10.77  
##                     3rd Qu.:512.0   3rd Qu.:137   3rd Qu.:16.00  
##                     Max.   :687.0   Max.   :238   Max.   :40.00  
##                                                                  
##       Runs             RBI             Walks            Years       
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   : 1.000  
##  1st Qu.: 30.25   1st Qu.: 28.00   1st Qu.: 22.00   1st Qu.: 4.000  
##  Median : 48.00   Median : 44.00   Median : 35.00   Median : 6.000  
##  Mean   : 50.91   Mean   : 48.03   Mean   : 38.74   Mean   : 7.444  
##  3rd Qu.: 69.00   3rd Qu.: 64.75   3rd Qu.: 53.00   3rd Qu.:11.000  
##  Max.   :130.00   Max.   :121.00   Max.   :105.00   Max.   :24.000  
##                                                                     
##      CAtBat            CHits            CHmRun           CRuns       
##  Min.   :   19.0   Min.   :   4.0   Min.   :  0.00   Min.   :   1.0  
##  1st Qu.:  816.8   1st Qu.: 209.0   1st Qu.: 14.00   1st Qu.: 100.2  
##  Median : 1928.0   Median : 508.0   Median : 37.50   Median : 247.0  
##  Mean   : 2648.7   Mean   : 717.6   Mean   : 69.49   Mean   : 358.8  
##  3rd Qu.: 3924.2   3rd Qu.:1059.2   3rd Qu.: 90.00   3rd Qu.: 526.2  
##  Max.   :14053.0   Max.   :4256.0   Max.   :548.00   Max.   :2165.0  
##                                                                      
##       CRBI             CWalks           League            Division        
##  Min.   :   0.00   Min.   :   0.00   Length:322         Length:322        
##  1st Qu.:  88.75   1st Qu.:  67.25   Class :character   Class :character  
##  Median : 220.50   Median : 170.50   Mode  :character   Mode  :character  
##  Mean   : 330.12   Mean   : 260.24                                        
##  3rd Qu.: 426.25   3rd Qu.: 339.25                                        
##  Max.   :1659.00   Max.   :1566.00                                        
##                                                                           
##     PutOuts          Assists          Errors          Salary      
##  Min.   :   0.0   Min.   :  0.0   Min.   : 0.00   Min.   :  67.5  
##  1st Qu.: 109.2   1st Qu.:  7.0   1st Qu.: 3.00   1st Qu.: 190.0  
##  Median : 212.0   Median : 39.5   Median : 6.00   Median : 425.0  
##  Mean   : 288.9   Mean   :106.9   Mean   : 8.04   Mean   : 535.9  
##  3rd Qu.: 325.0   3rd Qu.:166.0   3rd Qu.:11.00   3rd Qu.: 750.0  
##  Max.   :1378.0   Max.   :492.0   Max.   :32.00   Max.   :2460.0  
##                                                   NA's   :59      
##   NewLeague        
##  Length:322        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
df <- setNames(df, c("Player_Names","At_Bats_1986", "Hits_1986", "HRs_1986", "Rs_1986", "RBIs_1986", "BBs_1986", "Years_In_MLB", "ABs_Career", "Hits_Career", "HRs_Career","Rs_Career", "RBIs_Career", "BBs_Career", "League_1986", "Division_1986", "PutOuts_1986", "Assists_1986", "Errors_1986", "Salary_1987", "League_1987"))
summary(df)
##  Player_Names        At_Bats_1986     Hits_1986      HRs_1986    
##  Length:322         Min.   : 16.0   Min.   :  1   Min.   : 0.00  
##  Class :character   1st Qu.:255.2   1st Qu.: 64   1st Qu.: 4.00  
##  Mode  :character   Median :379.5   Median : 96   Median : 8.00  
##                     Mean   :380.9   Mean   :101   Mean   :10.77  
##                     3rd Qu.:512.0   3rd Qu.:137   3rd Qu.:16.00  
##                     Max.   :687.0   Max.   :238   Max.   :40.00  
##                                                                  
##     Rs_1986         RBIs_1986         BBs_1986       Years_In_MLB   
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   : 1.000  
##  1st Qu.: 30.25   1st Qu.: 28.00   1st Qu.: 22.00   1st Qu.: 4.000  
##  Median : 48.00   Median : 44.00   Median : 35.00   Median : 6.000  
##  Mean   : 50.91   Mean   : 48.03   Mean   : 38.74   Mean   : 7.444  
##  3rd Qu.: 69.00   3rd Qu.: 64.75   3rd Qu.: 53.00   3rd Qu.:11.000  
##  Max.   :130.00   Max.   :121.00   Max.   :105.00   Max.   :24.000  
##                                                                     
##    ABs_Career       Hits_Career       HRs_Career       Rs_Career     
##  Min.   :   19.0   Min.   :   4.0   Min.   :  0.00   Min.   :   1.0  
##  1st Qu.:  816.8   1st Qu.: 209.0   1st Qu.: 14.00   1st Qu.: 100.2  
##  Median : 1928.0   Median : 508.0   Median : 37.50   Median : 247.0  
##  Mean   : 2648.7   Mean   : 717.6   Mean   : 69.49   Mean   : 358.8  
##  3rd Qu.: 3924.2   3rd Qu.:1059.2   3rd Qu.: 90.00   3rd Qu.: 526.2  
##  Max.   :14053.0   Max.   :4256.0   Max.   :548.00   Max.   :2165.0  
##                                                                      
##   RBIs_Career        BBs_Career      League_1986        Division_1986     
##  Min.   :   0.00   Min.   :   0.00   Length:322         Length:322        
##  1st Qu.:  88.75   1st Qu.:  67.25   Class :character   Class :character  
##  Median : 220.50   Median : 170.50   Mode  :character   Mode  :character  
##  Mean   : 330.12   Mean   : 260.24                                        
##  3rd Qu.: 426.25   3rd Qu.: 339.25                                        
##  Max.   :1659.00   Max.   :1566.00                                        
##                                                                           
##   PutOuts_1986     Assists_1986    Errors_1986     Salary_1987    
##  Min.   :   0.0   Min.   :  0.0   Min.   : 0.00   Min.   :  67.5  
##  1st Qu.: 109.2   1st Qu.:  7.0   1st Qu.: 3.00   1st Qu.: 190.0  
##  Median : 212.0   Median : 39.5   Median : 6.00   Median : 425.0  
##  Mean   : 288.9   Mean   :106.9   Mean   : 8.04   Mean   : 535.9  
##  3rd Qu.: 325.0   3rd Qu.:166.0   3rd Qu.:11.00   3rd Qu.: 750.0  
##  Max.   :1378.0   Max.   :492.0   Max.   :32.00   Max.   :2460.0  
##                                                   NA's   :59      
##  League_1987       
##  Length:322        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 

The names of the columns were changed to be more reader friendly. It is easier to now know whether a varaible is referring to a player’s singular season or his whole career.

veterans <- subset(df, Years_In_MLB > 10)
head(veterans)
##        Player_Names At_Bats_1986 Hits_1986 HRs_1986 Rs_1986 RBIs_1986 BBs_1986
## 2       -Alan Ashby          315        81        7      24        38       39
## 4     -Andre Dawson          496       141       20      65        78       37
## 6  -Alfredo Griffin          594       169        4      74        51       35
## 10  -Andre Thornton          401        92       17      49        66       65
## 15      -Bill Almon          196        43        7      29        27       30
## 17      -Buddy Bell          568       158       20      89        75       73
##    Years_In_MLB ABs_Career Hits_Career HRs_Career Rs_Career RBIs_Career
## 2            14       3449         835         69       321         414
## 4            11       5628        1575        225       828         838
## 6            11       4408        1133         19       501         336
## 10           13       5206        1332        253       784         890
## 15           13       3231         825         36       376         290
## 17           15       8068        2273        177      1045         993
##    BBs_Career League_1986 Division_1986 PutOuts_1986 Assists_1986 Errors_1986
## 2         375           N             W          632           43          10
## 4         354           N             E          200           11           3
## 6         194           A             W          282          421          25
## 10        866           A             E            0            0           0
## 15        238           N             E           80           45           8
## 17        732           N             W          105          290          10
##    Salary_1987 League_1987
## 2          475           N
## 4          500           N
## 6          750           A
## 10        1100           A
## 15         240           N
## 17         775           N

This subset was created by looking at player who accumulated more than 10 years played in the major leagues. Players who reach 10 years of major-league service time are statistical superheroes! Fewer than 10% in baseball history have played for a decade or more. That subset will be considered in our case “veteran” players.

goodplayers <- subset(df, Hits_1986 > 150)
head(goodplayers)
##        Player_Names At_Bats_1986 Hits_1986 HRs_1986 Rs_1986 RBIs_1986 BBs_1986
## 6  -Alfredo Griffin          594       169        4      74        51       35
## 11   -Alan Trammell          574       159       21     107        75       59
## 17      -Buddy Bell          568       158       20      89        75       73
## 25    -Bill Buckner          629       168       18      73       102       40
## 26    -Brett Butler          587       163        4      92        51       70
## 29      -Bill Doran          550       152        6      92        37       81
##    Years_In_MLB ABs_Career Hits_Career HRs_Career Rs_Career RBIs_Career
## 6            11       4408        1133         19       501         336
## 11           10       4631        1300         90       702         504
## 17           15       8068        2273        177      1045         993
## 25           18       8424        2464        164      1008        1072
## 26            6       2695         747         17       442         198
## 29            5       2308         633         32       349         182
##    BBs_Career League_1986 Division_1986 PutOuts_1986 Assists_1986 Errors_1986
## 6         194           A             W          282          421          25
## 11        488           A             E          238          445          22
## 17        732           N             W          105          290          10
## 25        402           A             E         1067          157          14
## 26        317           A             E          434            9           3
## 29        308           N             W          262          329          16
##    Salary_1987 League_1987
## 6      750.000           A
## 11     517.143           A
## 17     775.000           N
## 25     776.667           A
## 26     765.000           A
## 29     625.000           N

Another subset of players I wanted to look at, was players who had over 150 hits in the 1986 season. This subset showed me 52 of the best hitters in baseball that season.

MVPs <- subset(goodplayers, HRs_1986 > 20 & RBIs_1986 > 100)
MVPs
##        Player_Names At_Bats_1986 Hits_1986 HRs_1986 Rs_1986 RBIs_1986 BBs_1986
## 83   -Don Mattingly          677       238       31     117       113       53
## 87     -Dave Parker          637       174       31      89       116       56
## 109    -George Bell          641       198       31     101       108       41
## 114    -Glenn Davis          574       152       31      91       101       64
## 116    -Gary Gaetti          596       171       34      91       108       52
## 137 -Jesse Barfield          589       170       40     107       108       69
## 142     -Joe Carter          663       200       29     108       121       32
## 160    -Jim Presley          616       163       27      83       107       32
##     Years_In_MLB ABs_Career Hits_Career HRs_Career Rs_Career RBIs_Career
## 83             5       2223         737         93       349         401
## 87            14       6727        2024        247       978        1093
## 109            5       2129         610         92       297         319
## 114            3        985         260         53       148         173
## 116            6       2862         728        107       361         401
## 137            6       2325         634        128       371         376
## 142            4       1447         404         57       210         222
## 160            3       1437         377         65       181         227
##     BBs_Career League_1986 Division_1986 PutOuts_1986 Assists_1986 Errors_1986
## 83         171           A             E         1377          100           6
## 87         495           N             W          278            9           9
## 109        117           A             E          269           17          10
## 114         95           N             W         1253          111          11
## 116        224           A             W          118          334          21
## 137        238           A             E          368           20           3
## 142         68           A             E          241            8           6
## 160         82           A             W          110          308          15
##     Salary_1987 League_1987
## 83     1975.000           A
## 87     1041.667           N
## 109    1175.000           A
## 114     215.000           N
## 116     900.000           A
## 137    1237.500           A
## 142     250.000           A
## 160     200.000           A

After I looked at some of the best hitters in baseball that year, I wanted to look at the best of the best. I looked at players with at least 150 hits, at least 20 homeruns, and at least 100 runs batted in. The eight players I found were some of the best players in baseball that year.

df$bAVG <- round(df$Hits_1986/df$At_Bats_1986, digits = 3)
head(df)
##        Player_Names At_Bats_1986 Hits_1986 HRs_1986 Rs_1986 RBIs_1986 BBs_1986
## 1    -Andy Allanson          293        66        1      30        29       14
## 2       -Alan Ashby          315        81        7      24        38       39
## 3      -Alvin Davis          479       130       18      66        72       76
## 4     -Andre Dawson          496       141       20      65        78       37
## 5 -Andres Galarraga          321        87       10      39        42       30
## 6  -Alfredo Griffin          594       169        4      74        51       35
##   Years_In_MLB ABs_Career Hits_Career HRs_Career Rs_Career RBIs_Career
## 1            1        293          66          1        30          29
## 2           14       3449         835         69       321         414
## 3            3       1624         457         63       224         266
## 4           11       5628        1575        225       828         838
## 5            2        396         101         12        48          46
## 6           11       4408        1133         19       501         336
##   BBs_Career League_1986 Division_1986 PutOuts_1986 Assists_1986 Errors_1986
## 1         14           A             E          446           33          20
## 2        375           N             W          632           43          10
## 3        263           A             W          880           82          14
## 4        354           N             E          200           11           3
## 5         33           N             E          805           40           4
## 6        194           A             W          282          421          25
##   Salary_1987 League_1987  bAVG
## 1          NA           A 0.225
## 2       475.0           N 0.257
## 3       480.0           A 0.271
## 4       500.0           N 0.284
## 5        91.5           N 0.271
## 6       750.0           A 0.285

By diving all of the players at bats by their hits, I was able to calculate the player’s batting averages. This was a fun category to look at because it is an easy way to tell how a player did just by looking at one number.

hist(df$Hits_1986)

By looking at this histogram, I was able to see that most players hit around 100 hits in 1986 while the really good ones (very few) hit over 200 hits.

plot(df$bAVG, df$Salary_1987, main="Batting Average VS Salary Next Year",
   xlab="Batting Average 1986", ylab="Salary 1987", pch=19)

I tried to find a correlation here between a player’s batting average and their salary the next year. There simply wasn’t enough of a correlation and that is probably due to the fact that just becuase a player had a high batting average, it doesn’t mean that they were going to necessarily get a new big contract the next year.