library(Lahman)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
# Load team data
teams_data <- Teams
# Select only the relevant columns we will select the year, team name, team ID, Wins (dependent variable), Strikeouts, Runs, Homeruns, Runs Allowed, Earned Run Average and, league ID (AL or NL)
teams_data <- teams_data %>%
  select(yearID,name, teamID, W, SO, R, HR, RA, ERA,lgID)

# Focus on recent years for a more relevant analysis, e.g., last 10 years and remove 2020 beacuase it was a shorter season 
recent_data <- teams_data %>%
  filter(yearID >= 2013 & yearID != 2020)

Inspect the head of the data to get some insight into the columns selected.

print(head(recent_data))
##   yearID                 name teamID  W   SO   R  HR  RA  ERA lgID
## 1   2013 Arizona Diamondbacks    ARI 81 1142 685 130 695 3.92   NL
## 2   2013       Atlanta Braves    ATL 96 1384 688 181 548 3.18   NL
## 3   2013    Baltimore Orioles    BAL 85 1125 745 212 709 4.20   AL
## 4   2013       Boston Red Sox    BOS 97 1308 853 178 656 3.79   AL
## 5   2013    Chicago White Sox    CHA 63 1207 598 148 723 3.98   AL
## 6   2013         Chicago Cubs    CHN 66 1230 602 172 689 4.00   NL

Inspect the tail to see we have data up to the 2022 season.

print(tail(recent_data))
##     yearID                 name teamID  W   SO   R  HR  RA  ERA lgID
## 265   2022 San Francisco Giants    SFN 81 1462 716 183 697 3.85   NL
## 266   2022  St. Louis Cardinals    SLN 93 1226 772 197 637 3.79   NL
## 267   2022       Tampa Bay Rays    TBA 86 1395 666 139 614 3.41   AL
## 268   2022        Texas Rangers    TEX 68 1446 707 198 743 4.22   AL
## 269   2022    Toronto Blue Jays    TOR 92 1242 775 200 679 3.87   AL
## 270   2022 Washington Nationals    WAS 55 1221 603 136 855 5.00   NL

The dimensions show that the data contains 270 rows and 10 columns

print(dim(recent_data))
## [1] 270  10

Check the league ID column to see if it can be used as the dichotomous term (binary). It should only have 2 values “AL” and “NL”. We can see from the code that this indeed is the case and this column only contains 2 values therefore it can be used as the dichotomous term.

# Check the unique values in the lgID column
league_id_vals <- unique(recent_data$lgID)

# Print the unique values
print(league_id_vals)
## [1] NL AL
## Levels: AA AL FL NA NL PL UA
# Count the number of unique values
num_unique_values <- length(league_id_vals)
print(num_unique_values)
## [1] 2

Create the quadratic term by squaring the HR column.

recent_data$HR_squared = recent_data$HR^2
print(recent_data)
##     yearID                          name teamID   W   SO   R  HR  RA  ERA lgID
## 1     2013          Arizona Diamondbacks    ARI  81 1142 685 130 695 3.92   NL
## 2     2013                Atlanta Braves    ATL  96 1384 688 181 548 3.18   NL
## 3     2013             Baltimore Orioles    BAL  85 1125 745 212 709 4.20   AL
## 4     2013                Boston Red Sox    BOS  97 1308 853 178 656 3.79   AL
## 5     2013             Chicago White Sox    CHA  63 1207 598 148 723 3.98   AL
## 6     2013                  Chicago Cubs    CHN  66 1230 602 172 689 4.00   NL
## 7     2013               Cincinnati Reds    CIN  90 1245 698 155 589 3.38   NL
## 8     2013             Cleveland Indians    CLE  92 1283 745 171 662 3.82   AL
## 9     2013              Colorado Rockies    COL  74 1204 706 159 760 4.44   NL
## 10    2013                Detroit Tigers    DET  93 1073 796 176 624 3.61   AL
## 11    2013                Houston Astros    HOU  51 1535 610 148 848 4.79   AL
## 12    2013            Kansas City Royals    KCA  86 1048 648 112 601 3.45   AL
## 13    2013 Los Angeles Angels of Anaheim    LAA  78 1221 733 164 737 4.23   AL
## 14    2013           Los Angeles Dodgers    LAN  92 1146 649 138 582 3.25   NL
## 15    2013                 Miami Marlins    MIA  62 1232 513  95 646 3.71   NL
## 16    2013             Milwaukee Brewers    MIL  74 1183 640 157 687 3.84   NL
## 17    2013               Minnesota Twins    MIN  66 1430 614 151 788 4.55   AL
## 18    2013              New York Yankees    NYA  85 1214 650 144 671 3.94   AL
## 19    2013                 New York Mets    NYN  74 1384 619 130 684 3.77   NL
## 20    2013             Oakland Athletics    OAK  96 1178 767 186 625 3.56   AL
## 21    2013         Philadelphia Phillies    PHI  73 1205 610 140 749 4.32   NL
## 22    2013            Pittsburgh Pirates    PIT  94 1330 634 161 577 3.26   NL
## 23    2013              San Diego Padres    SDN  76 1309 618 146 700 3.98   NL
## 24    2013              Seattle Mariners    SEA  71 1353 624 188 754 4.31   AL
## 25    2013          San Francisco Giants    SFN  76 1078 629 107 691 4.00   NL
## 26    2013           St. Louis Cardinals    SLN  97 1110 783 125 596 3.42   NL
## 27    2013                Tampa Bay Rays    TBA  92 1171 700 165 646 3.74   AL
## 28    2013                 Texas Rangers    TEX  91 1067 730 176 636 3.62   AL
## 29    2013             Toronto Blue Jays    TOR  74 1123 712 185 756 4.25   AL
## 30    2013          Washington Nationals    WAS  86 1192 656 161 626 3.59   NL
## 31    2014          Arizona Diamondbacks    ARI  64 1165 615 118 742 4.26   NL
## 32    2014                Atlanta Braves    ATL  79 1369 573 123 597 3.38   NL
## 33    2014             Baltimore Orioles    BAL  96 1285 705 211 593 3.43   AL
## 34    2014                Boston Red Sox    BOS  71 1337 634 123 715 4.01   AL
## 35    2014             Chicago White Sox    CHA  73 1362 660 155 758 4.29   AL
## 36    2014                  Chicago Cubs    CHN  73 1477 614 157 707 3.91   NL
## 37    2014               Cincinnati Reds    CIN  76 1252 595 131 612 3.59   NL
## 38    2014             Cleveland Indians    CLE  85 1189 669 142 653 3.56   AL
## 39    2014              Colorado Rockies    COL  66 1281 755 186 818 4.84   NL
## 40    2014                Detroit Tigers    DET  90 1144 757 155 705 4.01   AL
## 41    2014                Houston Astros    HOU  70 1442 629 163 723 4.11   AL
## 42    2014            Kansas City Royals    KCA  89  985 651  95 624 3.51   AL
## 43    2014 Los Angeles Angels of Anaheim    LAA  98 1266 773 155 630 3.58   AL
## 44    2014           Los Angeles Dodgers    LAN  94 1246 718 134 617 3.40   NL
## 45    2014                 Miami Marlins    MIA  77 1419 645 122 674 3.78   NL
## 46    2014             Milwaukee Brewers    MIL  82 1197 650 150 657 3.67   NL
## 47    2014               Minnesota Twins    MIN  70 1329 715 128 777 4.57   AL
## 48    2014              New York Yankees    NYA  84 1133 633 147 664 3.75   AL
## 49    2014                 New York Mets    NYN  79 1264 629 125 618 3.49   NL
## 50    2014             Oakland Athletics    OAK  88 1104 729 146 572 3.22   AL
## 51    2014         Philadelphia Phillies    PHI  73 1306 619 125 687 3.79   NL
## 52    2014            Pittsburgh Pirates    PIT  88 1244 682 156 631 3.47   NL
## 53    2014              San Diego Padres    SDN  77 1294 535 109 577 3.27   NL
## 54    2014              Seattle Mariners    SEA  87 1232 634 136 554 3.17   AL
## 55    2014          San Francisco Giants    SFN  88 1245 665 132 614 3.50   NL
## 56    2014           St. Louis Cardinals    SLN  90 1133 619 105 603 3.50   NL
## 57    2014                Tampa Bay Rays    TBA  77 1124 612 117 625 3.56   AL
## 58    2014                 Texas Rangers    TEX  67 1162 637 111 773 4.49   AL
## 59    2014             Toronto Blue Jays    TOR  83 1151 723 177 686 4.00   AL
## 60    2014          Washington Nationals    WAS  96 1304 686 152 555 3.03   NL
## 61    2015          Arizona Diamondbacks    ARI  79 1312 720 154 713 4.04   NL
## 62    2015                Atlanta Braves    ATL  67 1107 573 100 760 4.41   NL
## 63    2015             Baltimore Orioles    BAL  81 1331 713 217 693 4.05   AL
## 64    2015                Boston Red Sox    BOS  78 1148 748 161 753 4.31   AL
## 65    2015             Chicago White Sox    CHA  76 1231 622 136 701 3.98   AL
## 66    2015                  Chicago Cubs    CHN  97 1518 689 171 608 3.36   NL
## 67    2015               Cincinnati Reds    CIN  64 1255 640 167 754 4.33   NL
## 68    2015             Cleveland Indians    CLE  81 1157 669 141 640 3.67   AL
## 69    2015              Colorado Rockies    COL  68 1283 737 186 844 5.04   NL
## 70    2015                Detroit Tigers    DET  74 1259 689 151 803 4.64   AL
## 71    2015                Houston Astros    HOU  86 1392 729 230 618 3.57   AL
## 72    2015            Kansas City Royals    KCA  95  973 724 139 641 3.73   AL
## 73    2015 Los Angeles Angels of Anaheim    LAA  85 1150 661 176 675 3.94   AL
## 74    2015           Los Angeles Dodgers    LAN  92 1258 667 187 595 3.44   NL
## 75    2015                 Miami Marlins    MIA  71 1150 613 120 678 4.02   NL
## 76    2015             Milwaukee Brewers    MIL  68 1299 655 145 737 4.28   NL
## 77    2015               Minnesota Twins    MIN  83 1264 696 156 700 4.07   AL
## 78    2015              New York Yankees    NYA  87 1227 764 212 698 4.03   AL
## 79    2015                 New York Mets    NYN  90 1290 683 177 613 3.43   NL
## 80    2015             Oakland Athletics    OAK  68 1119 694 146 729 4.14   AL
## 81    2015         Philadelphia Phillies    PHI  63 1274 626 130 809 4.69   NL
## 82    2015            Pittsburgh Pirates    PIT  98 1322 697 140 596 3.21   NL
## 83    2015              San Diego Padres    SDN  74 1327 650 148 731 4.09   NL
## 84    2015              Seattle Mariners    SEA  76 1336 656 198 726 4.16   AL
## 85    2015          San Francisco Giants    SFN  84 1159 696 136 627 3.72   NL
## 86    2015           St. Louis Cardinals    SLN 100 1267 647 137 525 2.94   NL
## 87    2015                Tampa Bay Rays    TBA  80 1310 644 167 642 3.74   AL
## 88    2015                 Texas Rangers    TEX  88 1233 751 172 733 4.24   AL
## 89    2015             Toronto Blue Jays    TOR  93 1151 891 232 670 3.80   AL
## 90    2015          Washington Nationals    WAS  83 1344 703 177 635 3.62   NL
## 91    2016          Arizona Diamondbacks    ARI  69 1427 752 190 890 5.09   NL
## 92    2016                Atlanta Braves    ATL  68 1240 649 122 779 4.51   NL
## 93    2016             Baltimore Orioles    BAL  89 1324 744 253 715 4.22   AL
## 94    2016                Boston Red Sox    BOS  93 1160 878 208 694 4.00   AL
## 95    2016             Chicago White Sox    CHA  78 1285 686 168 715 4.10   AL
## 96    2016                  Chicago Cubs    CHN 103 1339 808 199 556 3.15   NL
## 97    2016               Cincinnati Reds    CIN  68 1284 716 164 854 4.91   NL
## 98    2016             Cleveland Indians    CLE  94 1246 777 185 676 3.84   AL
## 99    2016              Colorado Rockies    COL  75 1330 845 204 860 4.91   NL
## 100   2016                Detroit Tigers    DET  86 1303 750 211 721 4.24   AL
## 101   2016                Houston Astros    HOU  84 1452 724 198 701 4.06   AL
## 102   2016            Kansas City Royals    KCA  81 1224 675 147 712 4.21   AL
## 103   2016 Los Angeles Angels of Anaheim    LAA  74  991 717 156 727 4.28   AL
## 104   2016           Los Angeles Dodgers    LAN  91 1321 725 189 638 3.70   NL
## 105   2016                 Miami Marlins    MIA  79 1213 655 128 682 4.05   NL
## 106   2016             Milwaukee Brewers    MIL  73 1543 671 194 733 4.08   NL
## 107   2016               Minnesota Twins    MIN  59 1426 722 200 889 5.08   AL
## 108   2016              New York Yankees    NYA  84 1188 680 183 702 4.16   AL
## 109   2016                 New York Mets    NYN  87 1302 671 218 617 3.57   NL
## 110   2016             Oakland Athletics    OAK  69 1145 653 169 761 4.51   AL
## 111   2016         Philadelphia Phillies    PHI  71 1376 610 161 796 4.63   NL
## 112   2016            Pittsburgh Pirates    PIT  78 1334 729 153 758 4.21   NL
## 113   2016              San Diego Padres    SDN  68 1500 686 177 770 4.43   NL
## 114   2016              Seattle Mariners    SEA  86 1288 768 223 707 4.00   AL
## 115   2016          San Francisco Giants    SFN  87 1107 715 130 631 3.65   NL
## 116   2016           St. Louis Cardinals    SLN  86 1318 779 225 712 4.08   NL
## 117   2016                Tampa Bay Rays    TBA  68 1482 672 216 713 4.20   AL
## 118   2016                 Texas Rangers    TEX  95 1220 765 215 757 4.37   AL
## 119   2016             Toronto Blue Jays    TOR  89 1362 759 221 666 3.78   AL
## 120   2016          Washington Nationals    WAS  95 1252 763 203 612 3.51   NL
## 121   2017          Arizona Diamondbacks    ARI  93 1456 812 220 659 3.66   NL
## 122   2017                Atlanta Braves    ATL  72 1184 732 165 821 4.72   NL
## 123   2017             Baltimore Orioles    BAL  75 1412 743 232 841 4.97   AL
## 124   2017                Boston Red Sox    BOS  93 1224 785 168 668 3.70   AL
## 125   2017             Chicago White Sox    CHA  67 1397 706 186 820 4.78   AL
## 126   2017                  Chicago Cubs    CHN  92 1401 822 223 695 3.95   NL
## 127   2017               Cincinnati Reds    CIN  68 1329 753 219 869 5.17   NL
## 128   2017             Cleveland Indians    CLE 102 1153 818 212 564 3.30   AL
## 129   2017              Colorado Rockies    COL  87 1408 824 192 757 4.51   NL
## 130   2017                Detroit Tigers    DET  64 1313 735 187 894 5.36   AL
## 131   2017                Houston Astros    HOU 101 1087 896 238 700 4.12   AL
## 132   2017            Kansas City Royals    KCA  80 1166 702 193 791 4.61   AL
## 133   2017 Los Angeles Angels of Anaheim    LAA  80 1198 710 186 709 4.20   AL
## 134   2017           Los Angeles Dodgers    LAN 104 1380 770 221 580 3.38   NL
## 135   2017                 Miami Marlins    MIA  77 1282 778 194 822 4.82   NL
## 136   2017             Milwaukee Brewers    MIL  86 1571 732 224 697 4.00   NL
## 137   2017               Minnesota Twins    MIN  85 1342 815 206 788 4.59   AL
## 138   2017              New York Yankees    NYA  91 1386 858 241 660 3.72   AL
## 139   2017                 New York Mets    NYN  70 1291 735 224 863 5.01   NL
## 140   2017             Oakland Athletics    OAK  75 1491 739 234 826 4.67   AL
## 141   2017         Philadelphia Phillies    PHI  66 1417 690 174 782 4.55   NL
## 142   2017            Pittsburgh Pirates    PIT  75 1213 668 151 731 4.22   NL
## 143   2017              San Diego Padres    SDN  71 1499 604 189 816 4.67   NL
## 144   2017              Seattle Mariners    SEA  78 1267 750 200 772 4.46   AL
## 145   2017          San Francisco Giants    SFN  64 1204 639 128 776 4.50   NL
## 146   2017           St. Louis Cardinals    SLN  83 1348 761 196 705 4.01   NL
## 147   2017                Tampa Bay Rays    TBA  80 1538 694 228 704 3.97   AL
## 148   2017                 Texas Rangers    TEX  78 1493 799 237 816 4.66   AL
## 149   2017             Toronto Blue Jays    TOR  76 1327 693 222 784 4.42   AL
## 150   2017          Washington Nationals    WAS  97 1327 819 215 672 3.88   NL
## 151   2018          Arizona Diamondbacks    ARI  82 1460 693 176 644 3.72   NL
## 152   2018                Atlanta Braves    ATL  90 1290 759 175 657 3.75   NL
## 153   2018             Baltimore Orioles    BAL  47 1412 622 188 892 5.18   AL
## 154   2018                Boston Red Sox    BOS 108 1253 876 208 647 3.75   AL
## 155   2018             Chicago White Sox    CHA  62 1594 656 182 848 4.83   AL
## 156   2018                  Chicago Cubs    CHN  95 1388 761 167 645 3.65   NL
## 157   2018               Cincinnati Reds    CIN  67 1376 696 172 819 4.63   NL
## 158   2018             Cleveland Indians    CLE  91 1189 818 216 648 3.77   AL
## 159   2018              Colorado Rockies    COL  91 1397 780 210 745 4.33   NL
## 160   2018                Detroit Tigers    DET  64 1341 630 135 796 4.58   AL
## 161   2018                Houston Astros    HOU 103 1197 797 205 534 3.11   AL
## 162   2018            Kansas City Royals    KCA  58 1310 638 155 833 4.94   AL
## 163   2018 Los Angeles Angels of Anaheim    LAA  80 1300 721 214 722 4.15   AL
## 164   2018           Los Angeles Dodgers    LAN  92 1436 804 235 610 3.38   NL
## 165   2018                 Miami Marlins    MIA  63 1384 589 128 809 4.76   NL
## 166   2018             Milwaukee Brewers    MIL  96 1458 754 218 659 3.73   NL
## 167   2018               Minnesota Twins    MIN  78 1328 738 166 775 4.50   AL
## 168   2018              New York Yankees    NYA 100 1421 851 267 669 3.78   AL
## 169   2018                 New York Mets    NYN  77 1404 676 170 707 4.07   NL
## 170   2018             Oakland Athletics    OAK  97 1381 813 227 674 3.81   AL
## 171   2018         Philadelphia Phillies    PHI  80 1520 677 186 728 4.14   NL
## 172   2018            Pittsburgh Pirates    PIT  82 1229 692 157 693 4.00   NL
## 173   2018              San Diego Padres    SDN  66 1523 617 162 767 4.40   NL
## 174   2018              Seattle Mariners    SEA  89 1221 677 176 711 4.13   AL
## 175   2018          San Francisco Giants    SFN  73 1467 603 133 699 3.95   NL
## 176   2018           St. Louis Cardinals    SLN  88 1380 759 205 691 3.85   NL
## 177   2018                Tampa Bay Rays    TBA  90 1388 716 150 646 3.74   AL
## 178   2018                 Texas Rangers    TEX  67 1484 737 194 848 4.92   AL
## 179   2018             Toronto Blue Jays    TOR  73 1387 709 217 832 4.85   AL
## 180   2018          Washington Nationals    WAS  82 1289 771 191 682 4.04   NL
## 181   2019          Arizona Diamondbacks    ARI  85 1360 813 220 743 4.25   NL
## 182   2019                Atlanta Braves    ATL  97 1467 855 249 743 4.19   NL
## 183   2019             Baltimore Orioles    BAL  54 1435 729 213 981 5.59   AL
## 184   2019                Boston Red Sox    BOS  84 1382 901 245 828 4.70   AL
## 185   2019             Chicago White Sox    CHA  72 1549 708 182 832 4.90   AL
## 186   2019                  Chicago Cubs    CHN  84 1460 814 256 717 4.10   NL
## 187   2019               Cincinnati Reds    CIN  75 1436 701 227 711 4.18   NL
## 188   2019             Cleveland Indians    CLE  93 1332 769 223 657 3.76   AL
## 189   2019              Colorado Rockies    COL  71 1503 835 224 958 5.56   NL
## 190   2019                Detroit Tigers    DET  47 1595 582 149 915 5.24   AL
## 191   2019                Houston Astros    HOU 107 1166 920 288 640 3.66   AL
## 192   2019            Kansas City Royals    KCA  59 1405 691 162 869 5.20   AL
## 193   2019 Los Angeles Angels of Anaheim    LAA  72 1276 769 220 868 5.12   AL
## 194   2019           Los Angeles Dodgers    LAN 106 1356 886 279 613 3.37   NL
## 195   2019                 Miami Marlins    MIA  57 1469 615 146 808 4.74   NL
## 196   2019             Milwaukee Brewers    MIL  89 1563 769 250 766 4.40   NL
## 197   2019               Minnesota Twins    MIN 101 1334 939 307 754 4.18   AL
## 198   2019              New York Yankees    NYA 103 1437 943 306 739 4.31   AL
## 199   2019                 New York Mets    NYN  86 1384 791 242 737 4.24   NL
## 200   2019             Oakland Athletics    OAK  97 1338 845 257 680 3.97   AL
## 201   2019         Philadelphia Phillies    PHI  81 1453 774 215 794 4.53   NL
## 202   2019            Pittsburgh Pirates    PIT  69 1213 758 163 911 5.18   NL
## 203   2019              San Diego Padres    SDN  70 1581 682 219 789 4.60   NL
## 204   2019              Seattle Mariners    SEA  68 1581 758 239 893 4.99   AL
## 205   2019          San Francisco Giants    SFN  77 1435 678 167 773 4.38   NL
## 206   2019           St. Louis Cardinals    SLN  91 1420 764 210 662 3.80   NL
## 207   2019                Tampa Bay Rays    TBA  96 1493 769 217 656 3.65   AL
## 208   2019                 Texas Rangers    TEX  78 1578 810 223 878 5.06   AL
## 209   2019             Toronto Blue Jays    TOR  67 1514 726 247 828 4.79   AL
## 210   2019          Washington Nationals    WAS  93 1308 873 231 724 4.27   NL
## 211   2021          Arizona Diamondbacks    ARI  52 1465 679 144 893 5.11   NL
## 212   2021                Atlanta Braves    ATL  88 1453 790 239 656 3.88   NL
## 213   2021             Baltimore Orioles    BAL  52 1454 659 195 956 5.84   AL
## 214   2021                Boston Red Sox    BOS  92 1386 829 219 749 4.26   AL
## 215   2021             Chicago White Sox    CHA  93 1389 796 190 636 3.73   AL
## 216   2021                  Chicago Cubs    CHN  71 1596 705 210 839 4.87   NL
## 217   2021               Cincinnati Reds    CIN  83 1425 786 222 760 4.40   NL
## 218   2021             Cleveland Indians    CLE  80 1387 717 203 727 4.34   AL
## 219   2021              Colorado Rockies    COL  74 1356 739 182 796 4.82   NL
## 220   2021                Detroit Tigers    DET  77 1514 697 179 756 4.32   AL
## 221   2021                Houston Astros    HOU  95 1222 863 221 658 3.78   AL
## 222   2021            Kansas City Royals    KCA  74 1258 686 163 788 4.64   AL
## 223   2021 Los Angeles Angels of Anaheim    LAA  77 1394 723 190 804 4.69   AL
## 224   2021           Los Angeles Dodgers    LAN 106 1408 830 237 561 3.01   NL
## 225   2021                 Miami Marlins    MIA  67 1553 623 158 701 3.96   NL
## 226   2021             Milwaukee Brewers    MIL  95 1465 738 194 623 3.50   NL
## 227   2021               Minnesota Twins    MIN  73 1405 729 228 834 4.83   AL
## 228   2021              New York Yankees    NYA  92 1482 711 222 669 3.74   AL
## 229   2021                 New York Mets    NYN  77 1392 636 176 668 3.90   NL
## 230   2021             Oakland Athletics    OAK  86 1349 743 199 687 4.02   AL
## 231   2021         Philadelphia Phillies    PHI  82 1402 734 198 745 4.39   NL
## 232   2021            Pittsburgh Pirates    PIT  61 1328 609 124 833 5.08   NL
## 233   2021              San Diego Padres    SDN  79 1324 729 180 708 4.10   NL
## 234   2021              Seattle Mariners    SEA  90 1492 697 199 748 4.30   AL
## 235   2021          San Francisco Giants    SFN 107 1461 804 241 594 3.24   NL
## 236   2021           St. Louis Cardinals    SLN  90 1341 706 198 672 3.98   NL
## 237   2021                Tampa Bay Rays    TBA 100 1542 857 222 651 3.67   AL
## 238   2021                 Texas Rangers    TEX  60 1381 625 167 815 4.79   AL
## 239   2021             Toronto Blue Jays    TOR  91 1218 846 262 663 3.91   AL
## 240   2021          Washington Nationals    WAS  65 1303 724 182 820 4.80   NL
## 241   2022          Arizona Diamondbacks    ARI  74 1341 702 173 740 4.25   NL
## 242   2022                Atlanta Braves    ATL 101 1498 789 243 609 3.46   NL
## 243   2022             Baltimore Orioles    BAL  83 1390 674 171 688 3.97   AL
## 244   2022                Boston Red Sox    BOS  78 1373 735 155 787 4.53   AL
## 245   2022             Chicago White Sox    CHA  81 1269 686 149 717 3.92   AL
## 246   2022                  Chicago Cubs    CHN  74 1448 657 159 731 4.00   NL
## 247   2022               Cincinnati Reds    CIN  62 1430 648 156 815 4.86   NL
## 248   2022           Cleveland Guardians    CLE  92 1122 698 127 634 3.46   AL
## 249   2022              Colorado Rockies    COL  68 1330 698 149 873 5.06   NL
## 250   2022                Detroit Tigers    DET  66 1413 557 110 713 4.04   AL
## 251   2022                Houston Astros    HOU 106 1179 737 214 518 2.90   AL
## 252   2022            Kansas City Royals    KCA  65 1287 640 138 810 4.70   AL
## 253   2022 Los Angeles Angels of Anaheim    LAA  73 1539 623 190 668 3.77   AL
## 254   2022           Los Angeles Dodgers    LAN 111 1374 847 212 513 2.80   NL
## 255   2022                 Miami Marlins    MIA  69 1429 586 144 676 3.86   NL
## 256   2022             Milwaukee Brewers    MIL  86 1464 725 219 688 3.83   NL
## 257   2022               Minnesota Twins    MIN  78 1353 696 178 684 3.98   AL
## 258   2022              New York Yankees    NYA  99 1391 807 254 567 3.30   AL
## 259   2022                 New York Mets    NYN 101 1217 772 171 606 3.57   NL
## 260   2022             Oakland Athletics    OAK  60 1389 568 137 770 4.52   AL
## 261   2022         Philadelphia Phillies    PHI  87 1363 747 205 685 3.97   NL
## 262   2022            Pittsburgh Pirates    PIT  62 1497 591 158 817 4.66   NL
## 263   2022              San Diego Padres    SDN  89 1327 705 153 660 3.81   NL
## 264   2022              Seattle Mariners    SEA  90 1397 690 197 623 3.59   AL
## 265   2022          San Francisco Giants    SFN  81 1462 716 183 697 3.85   NL
## 266   2022           St. Louis Cardinals    SLN  93 1226 772 197 637 3.79   NL
## 267   2022                Tampa Bay Rays    TBA  86 1395 666 139 614 3.41   AL
## 268   2022                 Texas Rangers    TEX  68 1446 707 198 743 4.22   AL
## 269   2022             Toronto Blue Jays    TOR  92 1242 775 200 679 3.87   AL
## 270   2022          Washington Nationals    WAS  55 1221 603 136 855 5.00   NL
##     HR_squared
## 1        16900
## 2        32761
## 3        44944
## 4        31684
## 5        21904
## 6        29584
## 7        24025
## 8        29241
## 9        25281
## 10       30976
## 11       21904
## 12       12544
## 13       26896
## 14       19044
## 15        9025
## 16       24649
## 17       22801
## 18       20736
## 19       16900
## 20       34596
## 21       19600
## 22       25921
## 23       21316
## 24       35344
## 25       11449
## 26       15625
## 27       27225
## 28       30976
## 29       34225
## 30       25921
## 31       13924
## 32       15129
## 33       44521
## 34       15129
## 35       24025
## 36       24649
## 37       17161
## 38       20164
## 39       34596
## 40       24025
## 41       26569
## 42        9025
## 43       24025
## 44       17956
## 45       14884
## 46       22500
## 47       16384
## 48       21609
## 49       15625
## 50       21316
## 51       15625
## 52       24336
## 53       11881
## 54       18496
## 55       17424
## 56       11025
## 57       13689
## 58       12321
## 59       31329
## 60       23104
## 61       23716
## 62       10000
## 63       47089
## 64       25921
## 65       18496
## 66       29241
## 67       27889
## 68       19881
## 69       34596
## 70       22801
## 71       52900
## 72       19321
## 73       30976
## 74       34969
## 75       14400
## 76       21025
## 77       24336
## 78       44944
## 79       31329
## 80       21316
## 81       16900
## 82       19600
## 83       21904
## 84       39204
## 85       18496
## 86       18769
## 87       27889
## 88       29584
## 89       53824
## 90       31329
## 91       36100
## 92       14884
## 93       64009
## 94       43264
## 95       28224
## 96       39601
## 97       26896
## 98       34225
## 99       41616
## 100      44521
## 101      39204
## 102      21609
## 103      24336
## 104      35721
## 105      16384
## 106      37636
## 107      40000
## 108      33489
## 109      47524
## 110      28561
## 111      25921
## 112      23409
## 113      31329
## 114      49729
## 115      16900
## 116      50625
## 117      46656
## 118      46225
## 119      48841
## 120      41209
## 121      48400
## 122      27225
## 123      53824
## 124      28224
## 125      34596
## 126      49729
## 127      47961
## 128      44944
## 129      36864
## 130      34969
## 131      56644
## 132      37249
## 133      34596
## 134      48841
## 135      37636
## 136      50176
## 137      42436
## 138      58081
## 139      50176
## 140      54756
## 141      30276
## 142      22801
## 143      35721
## 144      40000
## 145      16384
## 146      38416
## 147      51984
## 148      56169
## 149      49284
## 150      46225
## 151      30976
## 152      30625
## 153      35344
## 154      43264
## 155      33124
## 156      27889
## 157      29584
## 158      46656
## 159      44100
## 160      18225
## 161      42025
## 162      24025
## 163      45796
## 164      55225
## 165      16384
## 166      47524
## 167      27556
## 168      71289
## 169      28900
## 170      51529
## 171      34596
## 172      24649
## 173      26244
## 174      30976
## 175      17689
## 176      42025
## 177      22500
## 178      37636
## 179      47089
## 180      36481
## 181      48400
## 182      62001
## 183      45369
## 184      60025
## 185      33124
## 186      65536
## 187      51529
## 188      49729
## 189      50176
## 190      22201
## 191      82944
## 192      26244
## 193      48400
## 194      77841
## 195      21316
## 196      62500
## 197      94249
## 198      93636
## 199      58564
## 200      66049
## 201      46225
## 202      26569
## 203      47961
## 204      57121
## 205      27889
## 206      44100
## 207      47089
## 208      49729
## 209      61009
## 210      53361
## 211      20736
## 212      57121
## 213      38025
## 214      47961
## 215      36100
## 216      44100
## 217      49284
## 218      41209
## 219      33124
## 220      32041
## 221      48841
## 222      26569
## 223      36100
## 224      56169
## 225      24964
## 226      37636
## 227      51984
## 228      49284
## 229      30976
## 230      39601
## 231      39204
## 232      15376
## 233      32400
## 234      39601
## 235      58081
## 236      39204
## 237      49284
## 238      27889
## 239      68644
## 240      33124
## 241      29929
## 242      59049
## 243      29241
## 244      24025
## 245      22201
## 246      25281
## 247      24336
## 248      16129
## 249      22201
## 250      12100
## 251      45796
## 252      19044
## 253      36100
## 254      44944
## 255      20736
## 256      47961
## 257      31684
## 258      64516
## 259      29241
## 260      18769
## 261      42025
## 262      24964
## 263      23409
## 264      38809
## 265      33489
## 266      38809
## 267      19321
## 268      39204
## 269      40000
## 270      18496

Multiple Regression Model

model.lm <- lm(W ~ HR * lgID + HR + HR_squared + SO + R + RA + ERA + lgID, data = recent_data)
print(summary(model.lm))
## 
## Call:
## lm(formula = W ~ HR * lgID + HR + HR_squared + SO + R + RA + 
##     ERA + lgID, data = recent_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.9593 -2.6031 -0.0307  2.5162 13.7163 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.847e+01  6.552e+00  13.502  < 2e-16 ***
## HR           2.381e-02  5.235e-02   0.455 0.649624    
## lgIDNL      -9.407e-01  2.472e+00  -0.381 0.703825    
## HR_squared  -3.189e-05  1.293e-04  -0.247 0.805426    
## SO          -1.307e-03  2.722e-03  -0.480 0.631427    
## R            8.708e-02  5.696e-03  15.287  < 2e-16 ***
## RA          -7.252e-02  1.968e-02  -3.684 0.000279 ***
## ERA         -4.689e+00  3.155e+00  -1.486 0.138398    
## HR:lgIDNL    4.047e-03  1.339e-02   0.302 0.762745    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.11 on 261 degrees of freedom
## Multiple R-squared:  0.8982, Adjusted R-squared:  0.895 
## F-statistic: 287.7 on 8 and 261 DF,  p-value: < 2.2e-16

The summary for the model shows that we have a multiple R-squared value of 0.8982 which is pretty close to 1 which is very desirable. We see that we have significant P values for the “R” (runs) and “RA”. This makes sense that these two columns would have a high significance because you win games by scoring more runs than the opponent so we would expect for these columns to be important to the model. We can also see a overall significant p-value of < 2.2e-16. The low p-value and high R-squared values hint at this being a good model.

Residual Analysis

Assessing the models summary we can see that we have a median close to zero similar min and max values and also similar quartiles so this data does seem to be normally distributed with a median close to 0.

plot(fitted(model.lm), resid(model.lm))

The residual plot above shows that the residuals seem to be randomly scattered around 0 without any discernible pattern, suggesting that the linear model may be a reasonable fit for the data.

qqnorm(resid(model.lm))
qqline(resid(model.lm))

The Q-Q plot above shows that we have some divergence on either end of the line we see more extreme divergence at the end of the line but we follow the line very closely for the most part. This suggests that the data may not be perfectly normal but it is close to normal.

par(mfrow=c(2,2))
plot(model.lm)

Conclusion

In conclusion this model seems to be a good fit for the data based on the R-squared value of .89 and significant p-value. The residual analysis showed a residual plot with no pattern centered horizontally around 0 with what appears to be mostly constant variance. The QQ plot showed that the data is mostly normal with some divergence at either end. The analysis shows that this model fits well to the data and could be a useable model at predicting wins for MLB teams.