NBA <- read.csv("NBA_test.csv")
str(NBA)
## 'data.frame': 28 obs. of 20 variables:
## $ SeasonEnd: int 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
## $ Team : chr "Atlanta Hawks" "Brooklyn Nets" "Charlotte Bobcats" "Chicago Bulls" ...
## $ Playoffs : int 1 1 0 1 0 0 1 0 1 1 ...
## $ W : int 44 49 21 45 24 41 57 29 47 45 ...
## $ PTS : int 8032 7944 7661 7641 7913 8293 8704 7778 8296 8688 ...
## $ oppPTS : int 7999 7798 8418 7615 8297 8342 8287 8105 8223 8403 ...
## $ FG : int 3084 2942 2823 2926 2993 3182 3339 2979 3130 3124 ...
## $ FGA : int 6644 6544 6649 6698 6901 6892 6983 6638 6840 6782 ...
## $ X2P : int 2378 2314 2354 2480 2446 2576 2818 2466 2472 2257 ...
## $ X2PA : int 4743 4784 5250 5433 5320 5264 5465 5198 5208 4413 ...
## $ X3P : int 706 628 469 446 547 606 521 513 658 867 ...
## $ X3PA : int 1901 1760 1399 1265 1581 1628 1518 1440 1632 2369 ...
## $ FT : int 1158 1432 1546 1343 1380 1323 1505 1307 1378 1573 ...
## $ FTA : int 1619 1958 2060 1738 1826 1669 2148 1870 1744 2087 ...
## $ ORB : int 758 1047 917 1026 1004 767 1092 991 885 909 ...
## $ DRB : int 2593 2460 2389 2514 2359 2670 2601 2463 2801 2652 ...
## $ AST : int 2007 1668 1587 1886 1694 1906 2002 1742 1845 1902 ...
## $ STL : int 664 599 591 588 647 648 762 574 567 679 ...
## $ BLK : int 369 391 479 417 334 454 533 400 346 359 ...
## $ TOV : int 1219 1206 1153 1171 1149 1144 1253 1241 1236 1348 ...
table(NBA$W, NBA$Playoffs)
##
## 0 1
## 20 1 0
## 21 1 0
## 24 1 0
## 25 1 0
## 27 1 0
## 28 1 0
## 29 2 0
## 31 1 0
## 33 1 0
## 34 2 0
## 38 0 1
## 41 1 0
## 43 1 0
## 44 0 1
## 45 0 3
## 47 0 1
## 49 0 1
## 54 0 1
## 56 0 2
## 57 0 1
## 58 0 1
## 60 0 1
## 66 0 1
NBA$PTSdiff = NBA$PTS - NBA$oppPTS
plot(NBA$PTSdiff, NBA$W)

WinsReg = lm(W ~ PTSdiff, data=NBA)
summary(WinsReg)
##
## Call:
## lm(formula = W ~ PTSdiff, data = NBA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.8754 -2.5961 0.9201 2.2031 4.8604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.040406 0.607890 67.51 <2e-16 ***
## PTSdiff 0.032894 0.001604 20.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.215 on 26 degrees of freedom
## Multiple R-squared: 0.9418, Adjusted R-squared: 0.9396
## F-statistic: 420.7 on 1 and 26 DF, p-value: < 2.2e-16
PointsReg = lm(PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + TOV + STL + BLK, data=NBA)
summary(PointsReg)
##
## Call:
## lm(formula = PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + TOV +
## STL + BLK, data = NBA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -277.583 -48.246 9.944 83.547 219.341
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1355.9913 1547.7731 0.876 0.39252
## X2PA 0.3160 0.2534 1.247 0.22832
## X3PA 0.8665 0.2706 3.202 0.00495 **
## FTA 0.5168 0.2586 1.999 0.06097 .
## AST 0.1528 0.4656 0.328 0.74648
## ORB -0.5648 0.4647 -1.215 0.23995
## DRB 0.6215 0.3994 1.556 0.13706
## TOV 0.1261 0.6609 0.191 0.85078
## STL 1.3077 0.6921 1.890 0.07503 .
## BLK 0.9798 0.6030 1.625 0.12158
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 162.7 on 18 degrees of freedom
## Multiple R-squared: 0.8463, Adjusted R-squared: 0.7695
## F-statistic: 11.01 on 9 and 18 DF, p-value: 1.153e-05
PointsReg$residuals
## 1 2 3 4 5 6
## -180.630948 28.156228 -228.031804 -22.774337 85.857324 46.177369
## 7 8 9 10 11 12
## 212.935479 28.720891 219.341079 -29.135134 -31.077259 -138.311043
## 13 14 15 16 17 18
## 6.116924 143.518156 -161.078281 -206.038514 144.414208 -16.697863
## 19 20 21 22 23 24
## 111.934115 33.608133 -34.540197 -89.361453 82.776819 215.365020
## 25 26 27 28
## -7.454091 50.021754 13.770897 -277.583472
SSE = sum(PointsReg$residuals^2)
SSE
## [1] 476701.4
RMSE = sqrt(SSE/nrow(NBA))
RMSE
## [1] 130.4801
mean(NBA$PTS)
## [1] 8061.821
max(NBA$PTS)
## [1] 8704
mean(NBA$PTSdiff)
## [1] -11
summary(PointsReg)
##
## Call:
## lm(formula = PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + TOV +
## STL + BLK, data = NBA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -277.583 -48.246 9.944 83.547 219.341
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1355.9913 1547.7731 0.876 0.39252
## X2PA 0.3160 0.2534 1.247 0.22832
## X3PA 0.8665 0.2706 3.202 0.00495 **
## FTA 0.5168 0.2586 1.999 0.06097 .
## AST 0.1528 0.4656 0.328 0.74648
## ORB -0.5648 0.4647 -1.215 0.23995
## DRB 0.6215 0.3994 1.556 0.13706
## TOV 0.1261 0.6609 0.191 0.85078
## STL 1.3077 0.6921 1.890 0.07503 .
## BLK 0.9798 0.6030 1.625 0.12158
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 162.7 on 18 degrees of freedom
## Multiple R-squared: 0.8463, Adjusted R-squared: 0.7695
## F-statistic: 11.01 on 9 and 18 DF, p-value: 1.153e-05
PointsReg2 = lm(PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + STL + BLK, data=NBA)
summary(PointsReg2)
##
## Call:
## lm(formula = PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + STL +
## BLK, data = NBA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -271.71 -51.30 10.46 85.35 221.61
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1386.3365 1500.0368 0.924 0.36697
## X2PA 0.3152 0.2469 1.277 0.21708
## X3PA 0.8625 0.2629 3.281 0.00393 **
## FTA 0.5394 0.2241 2.407 0.02643 *
## AST 0.1965 0.3951 0.497 0.62474
## ORB -0.5471 0.4437 -1.233 0.23260
## DRB 0.6321 0.3854 1.640 0.11742
## STL 1.2663 0.6403 1.978 0.06264 .
## BLK 0.9658 0.5832 1.656 0.11411
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 158.6 on 19 degrees of freedom
## Multiple R-squared: 0.846, Adjusted R-squared: 0.7812
## F-statistic: 13.05 on 8 and 19 DF, p-value: 3.068e-06
PointsReg3 = lm(PTS ~ X2PA + X3PA + FTA + AST + ORB + STL + BLK, data=NBA)
summary(PointsReg3)
##
## Call:
## lm(formula = PTS ~ X2PA + X3PA + FTA + AST + ORB + STL + BLK,
## data = NBA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -293.99 -61.35 7.48 69.67 345.57
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1777.5510 1542.2627 1.153 0.262681
## X2PA 0.4258 0.2473 1.722 0.100557
## X3PA 1.0085 0.2576 3.915 0.000858 ***
## FTA 0.6855 0.2141 3.201 0.004482 **
## AST 0.5472 0.3460 1.581 0.129519
## ORB -0.8026 0.4327 -1.855 0.078404 .
## STL 0.7289 0.5728 1.272 0.217839
## BLK 1.1608 0.5946 1.952 0.065043 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 165.1 on 20 degrees of freedom
## Multiple R-squared: 0.8242, Adjusted R-squared: 0.7627
## F-statistic: 13.39 on 7 and 20 DF, p-value: 2.589e-06
PointsReg4 = lm(PTS ~ X2PA + X3PA + FTA + AST + ORB + STL, data=NBA)
summary(PointsReg4)
##
## Call:
## lm(formula = PTS ~ X2PA + X3PA + FTA + AST + ORB + STL, data = NBA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -338.56 -54.78 -7.23 76.22 359.24
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1588.8677 1639.0327 0.969 0.343382
## X2PA 0.4617 0.2626 1.758 0.093254 .
## X3PA 0.8920 0.2668 3.343 0.003086 **
## FTA 0.8768 0.2027 4.325 0.000299 ***
## AST 0.6997 0.3590 1.949 0.064752 .
## ORB -0.8902 0.4583 -1.943 0.065593 .
## STL 0.9466 0.5983 1.582 0.128571
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 175.8 on 21 degrees of freedom
## Multiple R-squared: 0.7907, Adjusted R-squared: 0.7309
## F-statistic: 13.22 on 6 and 21 DF, p-value: 3.473e-06
SSE_4 = sum(PointsReg4$residuals^2)
RMSE_4 = sqrt(SSE_4/nrow(NBA))
SSE_4
## [1] 649219.9
RMSE_4
## [1] 152.2709
# Make predictions on test set
PointsPredictions = predict(PointsReg4, data=NBA)
SSE = sum((PointsPredictions - NBA$PTS)^2)
SST = sum((mean(NBA$PTS) - NBA$PTS)^2)
R2 = 1 - SSE/SST
R2
## [1] 0.7906947