library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rvest)
library(ggplot2)
df = read.csv("/Users/mathew.katz/Desktop/CUNYSPS/mlb2022.txt")
df = df[-1,]
df
## Rk Season Team Lg W GP W.1 L WL. ERA G CG SHO SV IP H R
## 2 1 2022 LAD NL 111 162 111 51 0.685 2.80 162 1 1 43 1451.1 1114 513
## 3 2 2022 HOU AL 106 162 106 56 0.654 2.90 162 3 1 53 1445.0 1121 518
## 4 3 2022 ATL NL 101 162 101 61 0.623 3.46 162 1 1 55 1448.0 1224 609
## 5 4 2022 NYM NL 101 162 101 61 0.623 3.58 162 0 0 41 1438.2 1274 606
## 6 5 2022 NYY AL 99 162 99 63 0.611 3.30 162 1 1 47 1451.2 1177 567
## 7 6 2022 STL NL 93 162 93 69 0.574 3.79 162 3 1 37 1436.0 1335 637
## 8 7 2022 TOR AL 92 162 92 70 0.568 3.89 162 0 0 46 1441.2 1356 679
## 9 8 2022 CLE AL 92 162 92 70 0.568 3.47 162 1 0 51 1456.0 1252 634
## 10 9 2022 SEA AL 90 162 90 72 0.556 3.59 162 0 0 40 1447.0 1277 623
## 11 10 2022 SDP NL 89 162 89 73 0.549 3.81 162 0 0 48 1443.1 1263 660
## 12 11 2022 PHI NL 87 162 87 75 0.537 3.98 162 3 1 42 1428.1 1330 685
## 13 12 2022 TBR AL 86 162 86 76 0.531 3.41 162 0 0 44 1435.2 1260 614
## 14 13 2022 MIL NL 86 162 86 76 0.531 3.83 162 0 0 52 1446.0 1238 688
## 15 14 2022 BAL AL 83 162 83 79 0.512 3.97 162 2 1 46 1433.2 1406 688
## 16 15 2022 CHW AL 81 162 81 81 0.500 3.94 162 2 1 48 1447.2 1330 717
## 17 16 2022 SFG NL 81 162 81 81 0.500 3.86 162 1 0 39 1433.0 1397 697
## 18 17 2022 BOS AL 78 162 78 84 0.481 4.54 162 5 2 39 1430.2 1411 787
## 19 18 2022 MIN AL 78 162 78 84 0.481 3.98 162 0 0 28 1437.1 1320 684
## 20 19 2022 ARI NL 74 162 74 88 0.457 4.26 162 0 0 33 1429.2 1345 740
## 21 20 2022 CHC NL 74 162 74 88 0.457 4.03 162 0 0 44 1444.0 1342 731
## 22 21 2022 LAA AL 73 162 73 89 0.451 3.79 162 2 2 38 1435.2 1241 668
## 23 22 2022 MIA NL 69 162 69 93 0.426 3.87 162 6 1 41 1437.2 1311 676
## 24 23 2022 COL NL 68 162 68 94 0.420 5.08 162 1 1 43 1425.1 1516 873
## 25 24 2022 TEX AL 68 162 68 94 0.420 4.22 162 1 1 37 1435.0 1345 743
## 26 25 2022 DET AL 66 162 66 96 0.407 4.04 162 0 0 38 1419.2 1336 713
## 27 26 2022 KCR AL 65 162 65 97 0.401 4.72 162 0 0 33 1416.0 1493 810
## 28 27 2022 PIT NL 62 162 62 100 0.383 4.66 162 0 0 33 1421.1 1432 817
## 29 28 2022 CIN NL 62 162 62 100 0.383 4.86 162 1 1 31 1423.2 1366 815
## 30 29 2022 OAK AL 60 162 60 102 0.370 4.54 162 0 0 34 1426.1 1394 770
## 31 30 2022 WSN NL 55 162 55 107 0.340 5.00 162 2 0 28 1411.2 1469 855
## ER HR BB IBB SO HBP BK WP BF ERA. FIP WHIP H9 HR9 BB9 SO9 SO.BB
## 2 451 152 407 13 1465 75 3 38 5865 149 3.45 1.048 6.9 0.9 2.5 9.1 3.60
## 3 465 134 458 6 1524 60 6 56 5856 134 3.28 1.093 7.0 0.8 2.9 9.5 3.33
## 4 556 148 500 21 1554 62 4 55 6031 121 3.46 1.191 7.6 0.9 3.1 9.7 3.11
## 5 573 169 428 13 1565 71 2 35 5950 108 3.50 1.183 8.0 1.1 2.7 9.8 3.66
## 6 533 157 444 10 1459 65 5 40 5938 119 3.56 1.117 7.3 1.0 2.8 9.0 3.29
## 7 605 146 489 11 1177 60 3 43 6014 101 3.94 1.270 8.4 0.9 3.1 7.4 2.41
## 8 623 180 424 15 1390 76 5 29 6053 100 3.85 1.235 8.5 1.1 2.6 8.7 3.28
## 9 562 172 435 14 1390 57 2 49 5989 110 3.75 1.159 7.7 1.1 2.7 8.6 3.20
## 10 577 186 447 24 1391 56 0 45 5986 105 3.90 1.191 7.9 1.2 2.8 8.7 3.11
## 11 611 173 468 6 1451 88 5 54 6047 100 3.82 1.199 7.9 1.1 2.9 9.0 3.10
## 12 631 150 463 16 1423 68 3 47 6006 102 3.60 1.255 8.4 0.9 2.9 9.0 3.07
## 13 544 172 384 15 1384 66 4 54 5930 108 3.68 1.145 7.9 1.1 2.4 8.7 3.60
## 14 615 190 521 12 1530 67 4 47 6057 104 3.92 1.216 7.7 1.2 3.2 9.5 2.94
## 15 633 171 443 8 1214 64 4 47 6058 102 4.03 1.290 8.8 1.1 2.8 7.6 2.74
## 16 633 166 533 15 1450 51 6 64 6145 102 3.81 1.287 8.3 1.0 3.3 9.0 2.72
## 17 615 132 441 16 1370 52 2 53 6070 106 3.43 1.283 8.8 0.8 2.8 8.6 3.11
## 18 721 185 526 17 1346 72 8 60 6167 93 4.17 1.354 8.9 1.2 3.3 8.5 2.56
## 19 636 184 468 19 1336 66 4 50 6042 98 4.03 1.244 8.3 1.2 2.9 8.4 2.85
## 20 676 191 504 18 1216 59 3 51 6065 96 4.33 1.293 8.5 1.2 3.2 7.7 2.41
## 21 646 207 540 19 1383 73 8 53 6162 102 4.33 1.303 8.4 1.3 3.4 8.6 2.56
## 22 604 168 540 23 1383 60 3 64 6038 108 3.96 1.241 7.8 1.1 3.4 8.7 2.56
## 23 618 173 511 19 1437 76 3 54 6056 105 3.90 1.267 8.2 1.1 3.2 9.0 2.81
## 24 804 184 539 12 1187 59 3 65 6240 92 4.38 1.442 9.6 1.2 3.4 7.5 2.20
## 25 673 169 581 16 1314 71 7 66 6167 94 4.17 1.342 8.4 1.1 3.6 8.2 2.26
## 26 637 167 511 9 1195 57 2 59 6047 94 4.16 1.301 8.5 1.1 3.2 7.6 2.34
## 27 742 173 589 15 1191 71 6 88 6249 86 4.42 1.470 9.5 1.1 3.7 7.6 2.02
## 28 736 164 586 23 1250 87 5 62 6263 88 4.27 1.420 9.1 1.0 3.7 7.9 2.13
## 29 769 213 612 21 1414 110 5 58 6220 93 4.59 1.389 8.6 1.3 3.9 8.9 2.31
## 30 719 195 503 37 1203 72 5 62 6121 83 4.41 1.330 8.8 1.2 3.2 7.6 2.39
## 31 785 244 558 12 1220 75 2 59 6220 78 4.98 1.436 9.4 1.6 3.6 7.8 2.19
summary(df)
## Rk Season Team Lg
## Min. : 1.00 Min. :2022 Length:30 Length:30
## 1st Qu.: 8.25 1st Qu.:2022 Class :character Class :character
## Median :15.50 Median :2022 Mode :character Mode :character
## Mean :15.50 Mean :2022
## 3rd Qu.:22.75 3rd Qu.:2022
## Max. :30.00 Max. :2022
## W GP W.1 L
## Min. : 55.00 Min. :162 Min. : 55.00 Min. : 51.00
## 1st Qu.: 68.25 1st Qu.:162 1st Qu.: 68.25 1st Qu.: 70.50
## Median : 81.00 Median :162 Median : 81.00 Median : 81.00
## Mean : 81.00 Mean :162 Mean : 81.00 Mean : 81.00
## 3rd Qu.: 91.50 3rd Qu.:162 3rd Qu.: 91.50 3rd Qu.: 93.75
## Max. :111.00 Max. :162 Max. :111.00 Max. :107.00
## WL. ERA G CG SHO
## Min. :0.3400 Min. :2.800 Min. :162 Min. :0.0 Min. :0.0000
## 1st Qu.:0.4215 1st Qu.:3.640 1st Qu.:162 1st Qu.:0.0 1st Qu.:0.0000
## Median :0.5000 Median :3.915 Median :162 Median :1.0 Median :0.0000
## Mean :0.5000 Mean :3.972 Mean :162 Mean :1.2 Mean :0.5333
## 3rd Qu.:0.5650 3rd Qu.:4.250 3rd Qu.:162 3rd Qu.:2.0 3rd Qu.:1.0000
## Max. :0.6850 Max. :5.080 Max. :162 Max. :6.0 Max. :2.0000
## SV IP H R ER
## Min. :28.00 Min. :1411 Min. :1114 Min. :513.0 Min. :451.0
## 1st Qu.:37.00 1st Qu.:1428 1st Qu.:1261 1st Qu.:634.8 1st Qu.:583.8
## Median :41.00 Median :1436 Median :1332 Median :686.5 Median :627.0
## Mean :41.07 Mean :1436 Mean :1322 Mean :693.9 Mean :633.1
## 3rd Qu.:46.00 3rd Qu.:1445 3rd Qu.:1387 3rd Qu.:742.2 3rd Qu.:675.2
## Max. :55.00 Max. :1456 Max. :1516 Max. :873.0 Max. :804.0
## HR BB IBB SO
## Min. :132.0 Min. :384.0 Min. : 6.00 Min. :1177
## 1st Qu.:164.5 1st Qu.:444.8 1st Qu.:12.00 1st Qu.:1228
## Median :172.0 Median :501.5 Median :15.00 Median :1384
## Mean :173.8 Mean :495.1 Mean :15.83 Mean :1360
## 3rd Qu.:184.8 3rd Qu.:537.5 3rd Qu.:19.00 3rd Qu.:1447
## Max. :244.0 Max. :612.0 Max. :37.00 Max. :1565
## HBP BK WP BF
## Min. : 51.00 Min. :0.000 Min. :29.00 Min. :5856
## 1st Qu.: 60.00 1st Qu.:3.000 1st Qu.:47.00 1st Qu.:6008
## Median : 66.50 Median :4.000 Median :54.00 Median :6054
## Mean : 68.20 Mean :4.067 Mean :53.57 Mean :6068
## 3rd Qu.: 72.75 3rd Qu.:5.000 3rd Qu.:59.75 3rd Qu.:6158
## Max. :110.00 Max. :8.000 Max. :88.00 Max. :6263
## ERA. FIP WHIP H9
## Min. : 78.0 Min. :3.280 Min. :1.048 Min. :6.900
## 1st Qu.: 94.0 1st Qu.:3.697 1st Qu.:1.193 1st Qu.:7.900
## Median :102.0 Median :3.930 Median :1.268 Median :8.400
## Mean :102.7 Mean :3.969 Mean :1.266 Mean :8.303
## 3rd Qu.:107.5 3rd Qu.:4.245 3rd Qu.:1.323 3rd Qu.:8.750
## Max. :149.0 Max. :4.980 Max. :1.470 Max. :9.600
## HR9 BB9 SO9 SO.BB
## Min. :0.800 Min. :2.400 Min. :7.400 Min. :2.020
## 1st Qu.:1.000 1st Qu.:2.800 1st Qu.:7.825 1st Qu.:2.395
## Median :1.100 Median :3.150 Median :8.650 Median :2.775
## Mean :1.097 Mean :3.107 Mean :8.530 Mean :2.795
## 3rd Qu.:1.200 3rd Qu.:3.375 3rd Qu.:9.000 3rd Qu.:3.110
## Max. :1.600 Max. :3.900 Max. :9.800 Max. :3.660
mlb_lm <- lm(W ~ ERA, data = df)
summary(mlb_lm)
##
## Call:
## lm(formula = W ~ ERA, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.3444 -3.1407 0.3374 3.9965 12.3757
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 172.003 9.538 18.033 < 2e-16 ***
## ERA -22.909 2.378 -9.632 2.19e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.195 on 28 degrees of freedom
## Multiple R-squared: 0.7682, Adjusted R-squared: 0.7599
## F-statistic: 92.78 on 1 and 28 DF, p-value: 2.186e-10
plot(W ~ ERA, data=df)
abline(lm(W ~ ERA, data=df))

plot(mlb_lm$residuals, pch = 16, col = "red")
