NBA <- read.csv("NBA_test.csv")
str(NBA)
## 'data.frame':    28 obs. of  20 variables:
##  $ SeasonEnd: int  2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
##  $ Team     : chr  "Atlanta Hawks" "Brooklyn Nets" "Charlotte Bobcats" "Chicago Bulls" ...
##  $ Playoffs : int  1 1 0 1 0 0 1 0 1 1 ...
##  $ W        : int  44 49 21 45 24 41 57 29 47 45 ...
##  $ PTS      : int  8032 7944 7661 7641 7913 8293 8704 7778 8296 8688 ...
##  $ oppPTS   : int  7999 7798 8418 7615 8297 8342 8287 8105 8223 8403 ...
##  $ FG       : int  3084 2942 2823 2926 2993 3182 3339 2979 3130 3124 ...
##  $ FGA      : int  6644 6544 6649 6698 6901 6892 6983 6638 6840 6782 ...
##  $ X2P      : int  2378 2314 2354 2480 2446 2576 2818 2466 2472 2257 ...
##  $ X2PA     : int  4743 4784 5250 5433 5320 5264 5465 5198 5208 4413 ...
##  $ X3P      : int  706 628 469 446 547 606 521 513 658 867 ...
##  $ X3PA     : int  1901 1760 1399 1265 1581 1628 1518 1440 1632 2369 ...
##  $ FT       : int  1158 1432 1546 1343 1380 1323 1505 1307 1378 1573 ...
##  $ FTA      : int  1619 1958 2060 1738 1826 1669 2148 1870 1744 2087 ...
##  $ ORB      : int  758 1047 917 1026 1004 767 1092 991 885 909 ...
##  $ DRB      : int  2593 2460 2389 2514 2359 2670 2601 2463 2801 2652 ...
##  $ AST      : int  2007 1668 1587 1886 1694 1906 2002 1742 1845 1902 ...
##  $ STL      : int  664 599 591 588 647 648 762 574 567 679 ...
##  $ BLK      : int  369 391 479 417 334 454 533 400 346 359 ...
##  $ TOV      : int  1219 1206 1153 1171 1149 1144 1253 1241 1236 1348 ...
 table(NBA$W, NBA$Playoffs)
##     
##      0 1
##   20 1 0
##   21 1 0
##   24 1 0
##   25 1 0
##   27 1 0
##   28 1 0
##   29 2 0
##   31 1 0
##   33 1 0
##   34 2 0
##   38 0 1
##   41 1 0
##   43 1 0
##   44 0 1
##   45 0 3
##   47 0 1
##   49 0 1
##   54 0 1
##   56 0 2
##   57 0 1
##   58 0 1
##   60 0 1
##   66 0 1
NBA$PTSdiff = NBA$PTS - NBA$oppPTS
plot(NBA$PTSdiff, NBA$W)

WinsReg = lm(W ~ PTSdiff, data=NBA)
summary(WinsReg)
## 
## Call:
## lm(formula = W ~ PTSdiff, data = NBA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.8754 -2.5961  0.9201  2.2031  4.8604 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 41.040406   0.607890   67.51   <2e-16 ***
## PTSdiff      0.032894   0.001604   20.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.215 on 26 degrees of freedom
## Multiple R-squared:  0.9418, Adjusted R-squared:  0.9396 
## F-statistic: 420.7 on 1 and 26 DF,  p-value: < 2.2e-16
PointsReg = lm(PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + TOV + STL + BLK, data=NBA)
summary(PointsReg)
## 
## Call:
## lm(formula = PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + TOV + 
##     STL + BLK, data = NBA)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -277.583  -48.246    9.944   83.547  219.341 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 1355.9913  1547.7731   0.876  0.39252   
## X2PA           0.3160     0.2534   1.247  0.22832   
## X3PA           0.8665     0.2706   3.202  0.00495 **
## FTA            0.5168     0.2586   1.999  0.06097 . 
## AST            0.1528     0.4656   0.328  0.74648   
## ORB           -0.5648     0.4647  -1.215  0.23995   
## DRB            0.6215     0.3994   1.556  0.13706   
## TOV            0.1261     0.6609   0.191  0.85078   
## STL            1.3077     0.6921   1.890  0.07503 . 
## BLK            0.9798     0.6030   1.625  0.12158   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 162.7 on 18 degrees of freedom
## Multiple R-squared:  0.8463, Adjusted R-squared:  0.7695 
## F-statistic: 11.01 on 9 and 18 DF,  p-value: 1.153e-05
PointsReg$residuals
##           1           2           3           4           5           6 
## -180.630948   28.156228 -228.031804  -22.774337   85.857324   46.177369 
##           7           8           9          10          11          12 
##  212.935479   28.720891  219.341079  -29.135134  -31.077259 -138.311043 
##          13          14          15          16          17          18 
##    6.116924  143.518156 -161.078281 -206.038514  144.414208  -16.697863 
##          19          20          21          22          23          24 
##  111.934115   33.608133  -34.540197  -89.361453   82.776819  215.365020 
##          25          26          27          28 
##   -7.454091   50.021754   13.770897 -277.583472
SSE = sum(PointsReg$residuals^2)
SSE
## [1] 476701.4
RMSE = sqrt(SSE/nrow(NBA))
RMSE
## [1] 130.4801
mean(NBA$PTS)
## [1] 8061.821
max(NBA$PTS)
## [1] 8704
mean(NBA$PTSdiff)
## [1] -11
summary(PointsReg)
## 
## Call:
## lm(formula = PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + TOV + 
##     STL + BLK, data = NBA)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -277.583  -48.246    9.944   83.547  219.341 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 1355.9913  1547.7731   0.876  0.39252   
## X2PA           0.3160     0.2534   1.247  0.22832   
## X3PA           0.8665     0.2706   3.202  0.00495 **
## FTA            0.5168     0.2586   1.999  0.06097 . 
## AST            0.1528     0.4656   0.328  0.74648   
## ORB           -0.5648     0.4647  -1.215  0.23995   
## DRB            0.6215     0.3994   1.556  0.13706   
## TOV            0.1261     0.6609   0.191  0.85078   
## STL            1.3077     0.6921   1.890  0.07503 . 
## BLK            0.9798     0.6030   1.625  0.12158   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 162.7 on 18 degrees of freedom
## Multiple R-squared:  0.8463, Adjusted R-squared:  0.7695 
## F-statistic: 11.01 on 9 and 18 DF,  p-value: 1.153e-05
PointsReg2 = lm(PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + STL + BLK, data=NBA)
summary(PointsReg2)
## 
## Call:
## lm(formula = PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + STL + 
##     BLK, data = NBA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -271.71  -51.30   10.46   85.35  221.61 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 1386.3365  1500.0368   0.924  0.36697   
## X2PA           0.3152     0.2469   1.277  0.21708   
## X3PA           0.8625     0.2629   3.281  0.00393 **
## FTA            0.5394     0.2241   2.407  0.02643 * 
## AST            0.1965     0.3951   0.497  0.62474   
## ORB           -0.5471     0.4437  -1.233  0.23260   
## DRB            0.6321     0.3854   1.640  0.11742   
## STL            1.2663     0.6403   1.978  0.06264 . 
## BLK            0.9658     0.5832   1.656  0.11411   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 158.6 on 19 degrees of freedom
## Multiple R-squared:  0.846,  Adjusted R-squared:  0.7812 
## F-statistic: 13.05 on 8 and 19 DF,  p-value: 3.068e-06
PointsReg3 = lm(PTS ~ X2PA + X3PA + FTA + AST + ORB + STL + BLK, data=NBA)
summary(PointsReg3)
## 
## Call:
## lm(formula = PTS ~ X2PA + X3PA + FTA + AST + ORB + STL + BLK, 
##     data = NBA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -293.99  -61.35    7.48   69.67  345.57 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1777.5510  1542.2627   1.153 0.262681    
## X2PA           0.4258     0.2473   1.722 0.100557    
## X3PA           1.0085     0.2576   3.915 0.000858 ***
## FTA            0.6855     0.2141   3.201 0.004482 ** 
## AST            0.5472     0.3460   1.581 0.129519    
## ORB           -0.8026     0.4327  -1.855 0.078404 .  
## STL            0.7289     0.5728   1.272 0.217839    
## BLK            1.1608     0.5946   1.952 0.065043 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 165.1 on 20 degrees of freedom
## Multiple R-squared:  0.8242, Adjusted R-squared:  0.7627 
## F-statistic: 13.39 on 7 and 20 DF,  p-value: 2.589e-06
PointsReg4 = lm(PTS ~ X2PA + X3PA + FTA + AST + ORB + STL, data=NBA)
summary(PointsReg4)
## 
## Call:
## lm(formula = PTS ~ X2PA + X3PA + FTA + AST + ORB + STL, data = NBA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -338.56  -54.78   -7.23   76.22  359.24 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1588.8677  1639.0327   0.969 0.343382    
## X2PA           0.4617     0.2626   1.758 0.093254 .  
## X3PA           0.8920     0.2668   3.343 0.003086 ** 
## FTA            0.8768     0.2027   4.325 0.000299 ***
## AST            0.6997     0.3590   1.949 0.064752 .  
## ORB           -0.8902     0.4583  -1.943 0.065593 .  
## STL            0.9466     0.5983   1.582 0.128571    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 175.8 on 21 degrees of freedom
## Multiple R-squared:  0.7907, Adjusted R-squared:  0.7309 
## F-statistic: 13.22 on 6 and 21 DF,  p-value: 3.473e-06
SSE_4 = sum(PointsReg4$residuals^2)
RMSE_4 = sqrt(SSE_4/nrow(NBA))
SSE_4
## [1] 649219.9
RMSE_4
## [1] 152.2709
# Make predictions on test set
PointsPredictions = predict(PointsReg4, data=NBA)
SSE = sum((PointsPredictions - NBA$PTS)^2)
SST = sum((mean(NBA$PTS) - NBA$PTS)^2)
R2 = 1 - SSE/SST
R2
## [1] 0.7906947