library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rvest)
library(ggplot2)
df = read.csv("/Users/mathew.katz/Desktop/CUNYSPS/mlb2022.txt")
df = df[-1,]
df
##    Rk Season Team Lg   W  GP W.1   L   WL.  ERA   G CG SHO SV     IP    H   R
## 2   1   2022  LAD NL 111 162 111  51 0.685 2.80 162  1   1 43 1451.1 1114 513
## 3   2   2022  HOU AL 106 162 106  56 0.654 2.90 162  3   1 53 1445.0 1121 518
## 4   3   2022  ATL NL 101 162 101  61 0.623 3.46 162  1   1 55 1448.0 1224 609
## 5   4   2022  NYM NL 101 162 101  61 0.623 3.58 162  0   0 41 1438.2 1274 606
## 6   5   2022  NYY AL  99 162  99  63 0.611 3.30 162  1   1 47 1451.2 1177 567
## 7   6   2022  STL NL  93 162  93  69 0.574 3.79 162  3   1 37 1436.0 1335 637
## 8   7   2022  TOR AL  92 162  92  70 0.568 3.89 162  0   0 46 1441.2 1356 679
## 9   8   2022  CLE AL  92 162  92  70 0.568 3.47 162  1   0 51 1456.0 1252 634
## 10  9   2022  SEA AL  90 162  90  72 0.556 3.59 162  0   0 40 1447.0 1277 623
## 11 10   2022  SDP NL  89 162  89  73 0.549 3.81 162  0   0 48 1443.1 1263 660
## 12 11   2022  PHI NL  87 162  87  75 0.537 3.98 162  3   1 42 1428.1 1330 685
## 13 12   2022  TBR AL  86 162  86  76 0.531 3.41 162  0   0 44 1435.2 1260 614
## 14 13   2022  MIL NL  86 162  86  76 0.531 3.83 162  0   0 52 1446.0 1238 688
## 15 14   2022  BAL AL  83 162  83  79 0.512 3.97 162  2   1 46 1433.2 1406 688
## 16 15   2022  CHW AL  81 162  81  81 0.500 3.94 162  2   1 48 1447.2 1330 717
## 17 16   2022  SFG NL  81 162  81  81 0.500 3.86 162  1   0 39 1433.0 1397 697
## 18 17   2022  BOS AL  78 162  78  84 0.481 4.54 162  5   2 39 1430.2 1411 787
## 19 18   2022  MIN AL  78 162  78  84 0.481 3.98 162  0   0 28 1437.1 1320 684
## 20 19   2022  ARI NL  74 162  74  88 0.457 4.26 162  0   0 33 1429.2 1345 740
## 21 20   2022  CHC NL  74 162  74  88 0.457 4.03 162  0   0 44 1444.0 1342 731
## 22 21   2022  LAA AL  73 162  73  89 0.451 3.79 162  2   2 38 1435.2 1241 668
## 23 22   2022  MIA NL  69 162  69  93 0.426 3.87 162  6   1 41 1437.2 1311 676
## 24 23   2022  COL NL  68 162  68  94 0.420 5.08 162  1   1 43 1425.1 1516 873
## 25 24   2022  TEX AL  68 162  68  94 0.420 4.22 162  1   1 37 1435.0 1345 743
## 26 25   2022  DET AL  66 162  66  96 0.407 4.04 162  0   0 38 1419.2 1336 713
## 27 26   2022  KCR AL  65 162  65  97 0.401 4.72 162  0   0 33 1416.0 1493 810
## 28 27   2022  PIT NL  62 162  62 100 0.383 4.66 162  0   0 33 1421.1 1432 817
## 29 28   2022  CIN NL  62 162  62 100 0.383 4.86 162  1   1 31 1423.2 1366 815
## 30 29   2022  OAK AL  60 162  60 102 0.370 4.54 162  0   0 34 1426.1 1394 770
## 31 30   2022  WSN NL  55 162  55 107 0.340 5.00 162  2   0 28 1411.2 1469 855
##     ER  HR  BB IBB   SO HBP BK WP   BF ERA.  FIP  WHIP  H9 HR9 BB9 SO9 SO.BB
## 2  451 152 407  13 1465  75  3 38 5865  149 3.45 1.048 6.9 0.9 2.5 9.1  3.60
## 3  465 134 458   6 1524  60  6 56 5856  134 3.28 1.093 7.0 0.8 2.9 9.5  3.33
## 4  556 148 500  21 1554  62  4 55 6031  121 3.46 1.191 7.6 0.9 3.1 9.7  3.11
## 5  573 169 428  13 1565  71  2 35 5950  108 3.50 1.183 8.0 1.1 2.7 9.8  3.66
## 6  533 157 444  10 1459  65  5 40 5938  119 3.56 1.117 7.3 1.0 2.8 9.0  3.29
## 7  605 146 489  11 1177  60  3 43 6014  101 3.94 1.270 8.4 0.9 3.1 7.4  2.41
## 8  623 180 424  15 1390  76  5 29 6053  100 3.85 1.235 8.5 1.1 2.6 8.7  3.28
## 9  562 172 435  14 1390  57  2 49 5989  110 3.75 1.159 7.7 1.1 2.7 8.6  3.20
## 10 577 186 447  24 1391  56  0 45 5986  105 3.90 1.191 7.9 1.2 2.8 8.7  3.11
## 11 611 173 468   6 1451  88  5 54 6047  100 3.82 1.199 7.9 1.1 2.9 9.0  3.10
## 12 631 150 463  16 1423  68  3 47 6006  102 3.60 1.255 8.4 0.9 2.9 9.0  3.07
## 13 544 172 384  15 1384  66  4 54 5930  108 3.68 1.145 7.9 1.1 2.4 8.7  3.60
## 14 615 190 521  12 1530  67  4 47 6057  104 3.92 1.216 7.7 1.2 3.2 9.5  2.94
## 15 633 171 443   8 1214  64  4 47 6058  102 4.03 1.290 8.8 1.1 2.8 7.6  2.74
## 16 633 166 533  15 1450  51  6 64 6145  102 3.81 1.287 8.3 1.0 3.3 9.0  2.72
## 17 615 132 441  16 1370  52  2 53 6070  106 3.43 1.283 8.8 0.8 2.8 8.6  3.11
## 18 721 185 526  17 1346  72  8 60 6167   93 4.17 1.354 8.9 1.2 3.3 8.5  2.56
## 19 636 184 468  19 1336  66  4 50 6042   98 4.03 1.244 8.3 1.2 2.9 8.4  2.85
## 20 676 191 504  18 1216  59  3 51 6065   96 4.33 1.293 8.5 1.2 3.2 7.7  2.41
## 21 646 207 540  19 1383  73  8 53 6162  102 4.33 1.303 8.4 1.3 3.4 8.6  2.56
## 22 604 168 540  23 1383  60  3 64 6038  108 3.96 1.241 7.8 1.1 3.4 8.7  2.56
## 23 618 173 511  19 1437  76  3 54 6056  105 3.90 1.267 8.2 1.1 3.2 9.0  2.81
## 24 804 184 539  12 1187  59  3 65 6240   92 4.38 1.442 9.6 1.2 3.4 7.5  2.20
## 25 673 169 581  16 1314  71  7 66 6167   94 4.17 1.342 8.4 1.1 3.6 8.2  2.26
## 26 637 167 511   9 1195  57  2 59 6047   94 4.16 1.301 8.5 1.1 3.2 7.6  2.34
## 27 742 173 589  15 1191  71  6 88 6249   86 4.42 1.470 9.5 1.1 3.7 7.6  2.02
## 28 736 164 586  23 1250  87  5 62 6263   88 4.27 1.420 9.1 1.0 3.7 7.9  2.13
## 29 769 213 612  21 1414 110  5 58 6220   93 4.59 1.389 8.6 1.3 3.9 8.9  2.31
## 30 719 195 503  37 1203  72  5 62 6121   83 4.41 1.330 8.8 1.2 3.2 7.6  2.39
## 31 785 244 558  12 1220  75  2 59 6220   78 4.98 1.436 9.4 1.6 3.6 7.8  2.19
summary(df)
##        Rk            Season         Team                Lg           
##  Min.   : 1.00   Min.   :2022   Length:30          Length:30         
##  1st Qu.: 8.25   1st Qu.:2022   Class :character   Class :character  
##  Median :15.50   Median :2022   Mode  :character   Mode  :character  
##  Mean   :15.50   Mean   :2022                                        
##  3rd Qu.:22.75   3rd Qu.:2022                                        
##  Max.   :30.00   Max.   :2022                                        
##        W                GP           W.1               L         
##  Min.   : 55.00   Min.   :162   Min.   : 55.00   Min.   : 51.00  
##  1st Qu.: 68.25   1st Qu.:162   1st Qu.: 68.25   1st Qu.: 70.50  
##  Median : 81.00   Median :162   Median : 81.00   Median : 81.00  
##  Mean   : 81.00   Mean   :162   Mean   : 81.00   Mean   : 81.00  
##  3rd Qu.: 91.50   3rd Qu.:162   3rd Qu.: 91.50   3rd Qu.: 93.75  
##  Max.   :111.00   Max.   :162   Max.   :111.00   Max.   :107.00  
##       WL.              ERA              G             CG           SHO        
##  Min.   :0.3400   Min.   :2.800   Min.   :162   Min.   :0.0   Min.   :0.0000  
##  1st Qu.:0.4215   1st Qu.:3.640   1st Qu.:162   1st Qu.:0.0   1st Qu.:0.0000  
##  Median :0.5000   Median :3.915   Median :162   Median :1.0   Median :0.0000  
##  Mean   :0.5000   Mean   :3.972   Mean   :162   Mean   :1.2   Mean   :0.5333  
##  3rd Qu.:0.5650   3rd Qu.:4.250   3rd Qu.:162   3rd Qu.:2.0   3rd Qu.:1.0000  
##  Max.   :0.6850   Max.   :5.080   Max.   :162   Max.   :6.0   Max.   :2.0000  
##        SV              IP             H              R               ER       
##  Min.   :28.00   Min.   :1411   Min.   :1114   Min.   :513.0   Min.   :451.0  
##  1st Qu.:37.00   1st Qu.:1428   1st Qu.:1261   1st Qu.:634.8   1st Qu.:583.8  
##  Median :41.00   Median :1436   Median :1332   Median :686.5   Median :627.0  
##  Mean   :41.07   Mean   :1436   Mean   :1322   Mean   :693.9   Mean   :633.1  
##  3rd Qu.:46.00   3rd Qu.:1445   3rd Qu.:1387   3rd Qu.:742.2   3rd Qu.:675.2  
##  Max.   :55.00   Max.   :1456   Max.   :1516   Max.   :873.0   Max.   :804.0  
##        HR              BB             IBB              SO      
##  Min.   :132.0   Min.   :384.0   Min.   : 6.00   Min.   :1177  
##  1st Qu.:164.5   1st Qu.:444.8   1st Qu.:12.00   1st Qu.:1228  
##  Median :172.0   Median :501.5   Median :15.00   Median :1384  
##  Mean   :173.8   Mean   :495.1   Mean   :15.83   Mean   :1360  
##  3rd Qu.:184.8   3rd Qu.:537.5   3rd Qu.:19.00   3rd Qu.:1447  
##  Max.   :244.0   Max.   :612.0   Max.   :37.00   Max.   :1565  
##       HBP               BK              WP              BF      
##  Min.   : 51.00   Min.   :0.000   Min.   :29.00   Min.   :5856  
##  1st Qu.: 60.00   1st Qu.:3.000   1st Qu.:47.00   1st Qu.:6008  
##  Median : 66.50   Median :4.000   Median :54.00   Median :6054  
##  Mean   : 68.20   Mean   :4.067   Mean   :53.57   Mean   :6068  
##  3rd Qu.: 72.75   3rd Qu.:5.000   3rd Qu.:59.75   3rd Qu.:6158  
##  Max.   :110.00   Max.   :8.000   Max.   :88.00   Max.   :6263  
##       ERA.            FIP             WHIP             H9       
##  Min.   : 78.0   Min.   :3.280   Min.   :1.048   Min.   :6.900  
##  1st Qu.: 94.0   1st Qu.:3.697   1st Qu.:1.193   1st Qu.:7.900  
##  Median :102.0   Median :3.930   Median :1.268   Median :8.400  
##  Mean   :102.7   Mean   :3.969   Mean   :1.266   Mean   :8.303  
##  3rd Qu.:107.5   3rd Qu.:4.245   3rd Qu.:1.323   3rd Qu.:8.750  
##  Max.   :149.0   Max.   :4.980   Max.   :1.470   Max.   :9.600  
##       HR9             BB9             SO9            SO.BB      
##  Min.   :0.800   Min.   :2.400   Min.   :7.400   Min.   :2.020  
##  1st Qu.:1.000   1st Qu.:2.800   1st Qu.:7.825   1st Qu.:2.395  
##  Median :1.100   Median :3.150   Median :8.650   Median :2.775  
##  Mean   :1.097   Mean   :3.107   Mean   :8.530   Mean   :2.795  
##  3rd Qu.:1.200   3rd Qu.:3.375   3rd Qu.:9.000   3rd Qu.:3.110  
##  Max.   :1.600   Max.   :3.900   Max.   :9.800   Max.   :3.660
mlb_lm <- lm(W ~ ERA, data = df)

summary(mlb_lm)
## 
## Call:
## lm(formula = W ~ ERA, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.3444  -3.1407   0.3374   3.9965  12.3757 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  172.003      9.538  18.033  < 2e-16 ***
## ERA          -22.909      2.378  -9.632 2.19e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.195 on 28 degrees of freedom
## Multiple R-squared:  0.7682, Adjusted R-squared:  0.7599 
## F-statistic: 92.78 on 1 and 28 DF,  p-value: 2.186e-10
plot(W ~ ERA, data=df)
abline(lm(W ~ ERA, data=df))

plot(mlb_lm$residuals, pch = 16, col = "red")