## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Rows: 2455 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): TEAM, CONF, POSTSEASON
## dbl (21): G, W, ADJOE, ADJDE, BARTHAG, EFG_O, EFG_D, TOR, TORD, ORB, DRB, FT...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 6 × 25
##   TEAM       CONF      G     W ADJOE ADJDE BARTHAG EFG_O EFG_D   TOR  TORD   ORB
##   <chr>      <chr> <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 North Car… ACC      40    33  123.  94.9   0.953  52.6  48.1  15.4  18.2  40.7
## 2 Wisconsin  B10      40    36  129.  93.6   0.976  54.8  47.7  12.4  15.8  32.1
## 3 Michigan   B10      40    33  114.  90.4   0.938  53.9  47.7  14    19.5  25.5
## 4 Texas Tech B12      38    31  115.  85.2   0.970  53.5  43    17.7  22.8  27.4
## 5 Gonzaga    WCC      39    37  118.  86.3   0.973  56.6  41.1  16.2  17.1  30  
## 6 Kentucky   SEC      40    29  117.  96.2   0.906  49.9  46    18.1  16.1  42  
## # … with 13 more variables: DRB <dbl>, FTR <dbl>, FTRD <dbl>, `2P_O` <dbl>,
## #   `2P_D` <dbl>, `3P_O` <dbl>, `3P_D` <dbl>, ADJ_T <dbl>, WAB <dbl>,
## #   POSTSEASON <fct>, SEED <dbl>, YEAR <dbl>, WP <dbl>
## # A tibble: 6 × 25
##   TEAM       CONF      G     W ADJOE ADJDE BARTHAG EFG_O EFG_D   TOR  TORD   ORB
##   <chr>      <chr> <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 North Car… ACC      40    33  123.  94.9   0.953  52.6  48.1  15.4  18.2  40.7
## 2 Wisconsin  B10      40    36  129.  93.6   0.976  54.8  47.7  12.4  15.8  32.1
## 3 Michigan   B10      40    33  114.  90.4   0.938  53.9  47.7  14    19.5  25.5
## 4 Texas Tech B12      38    31  115.  85.2   0.970  53.5  43    17.7  22.8  27.4
## 5 Gonzaga    WCC      39    37  118.  86.3   0.973  56.6  41.1  16.2  17.1  30  
## 6 Kentucky   SEC      40    29  117.  96.2   0.906  49.9  46    18.1  16.1  42  
## # … with 13 more variables: DRB <dbl>, FTR <dbl>, FTRD <dbl>, `2P_O` <dbl>,
## #   `2P_D` <dbl>, `3P_O` <dbl>, `3P_D` <dbl>, ADJ_T <dbl>, WAB <dbl>,
## #   POSTSEASON <fct>, SEED <dbl>, YEAR <dbl>, WP <dbl>
#install.packages("ISLR")
library(ISLR)

#install.packages("rpart")
library(rpart)

#install.packages("MASS")
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
set.seed(1) 
train <- sample(1:nrow(bballsubset), nrow(bballsubset)/2)
tree.bballsubset  <- rpart(POSTSEASON~., bballsubset, subset=train, method = "anova")

summary(tree.bballsubset)
## Call:
## rpart(formula = POSTSEASON ~ ., data = bballsubset, subset = train, 
##     method = "anova")
##   n=263 (964 observations deleted due to missingness)
## 
##           CP nsplit rel error    xerror       xstd
## 1 0.32652326      0 1.0000000 1.0055888 0.13181827
## 2 0.08167923      1 0.6734767 0.7502098 0.09449891
## 3 0.06856329      2 0.5917975 0.7426239 0.09066088
## 4 0.02444941      3 0.5232342 0.6632891 0.07984608
## 5 0.02303258      5 0.4743354 0.7417943 0.08487301
## 6 0.01586788      6 0.4513028 0.7669824 0.08754729
## 7 0.01384799      8 0.4195671 0.7823116 0.08453252
## 8 0.01000000      9 0.4057191 0.7947551 0.08375125
## 
## Variable importance
## BARTHAG     WAB    SEED   ADJOE   ADJDE   EFG_D     DRB    FTRD     FTR     ORB 
##      30      18      15      11       7       5       3       2       2       2 
##     TOR    TORD    3P_O    3P_D 
##       1       1       1       1 
## 
## Node number 1: 263 observations,    complexity param=0.3265233
##   mean=6.136882, MSE=1.867195 
##   left son=2 (66 obs) right son=3 (197 obs)
##   Primary splits:
##       BARTHAG < 0.9158  to the right, improve=0.3265233, (0 missing)
##       WAB     < 1.55    to the right, improve=0.2996935, (0 missing)
##       SEED    < 4.5     to the left,  improve=0.2908791, (0 missing)
##       ADJOE   < 115.65  to the right, improve=0.1891627, (0 missing)
##       ADJDE   < 95.5    to the left,  improve=0.1842826, (0 missing)
##   Surrogate splits:
##       WAB   < 4.55    to the right, agree=0.897, adj=0.591, (0 split)
##       SEED  < 4.5     to the left,  agree=0.894, adj=0.576, (0 split)
##       ADJOE < 117.05  to the right, agree=0.852, adj=0.409, (0 split)
##       ADJDE < 92.55   to the left,  agree=0.810, adj=0.242, (0 split)
##       EFG_D < 44.95   to the left,  agree=0.802, adj=0.212, (0 split)
## 
## Node number 2: 66 observations,    complexity param=0.06856329
##   mean=4.787879, MSE=2.470156 
##   left son=4 (7 obs) right son=5 (59 obs)
##   Primary splits:
##       BARTHAG < 0.9655  to the right, improve=0.20652310, (0 missing)
##       SEED    < 1.5     to the left,  improve=0.08718610, (0 missing)
##       3P_D    < 29.95   to the left,  improve=0.07107393, (0 missing)
##       3P_O    < 34.2    to the left,  improve=0.07107393, (0 missing)
##       DRB     < 24.35   to the right, improve=0.07056896, (0 missing)
##   Surrogate splits:
##       WAB < 11.25   to the right, agree=0.909, adj=0.143, (0 split)
## 
## Node number 3: 197 observations,    complexity param=0.08167923
##   mean=6.588832, MSE=0.8512458 
##   left son=6 (75 obs) right son=7 (122 obs)
##   Primary splits:
##       WAB     < 1.55    to the right, improve=0.23918600, (0 missing)
##       BARTHAG < 0.86385 to the right, improve=0.17838540, (0 missing)
##       SEED    < 9.5     to the left,  improve=0.15442470, (0 missing)
##       ADJDE   < 99      to the left,  improve=0.09321217, (0 missing)
##       ADJOE   < 109.65  to the right, improve=0.08905300, (0 missing)
##   Surrogate splits:
##       SEED    < 8.5     to the left,  agree=0.858, adj=0.627, (0 split)
##       BARTHAG < 0.84275 to the right, agree=0.843, adj=0.587, (0 split)
##       ADJOE   < 111.15  to the right, agree=0.751, adj=0.347, (0 split)
##       ADJDE   < 93.5    to the left,  agree=0.701, adj=0.213, (0 split)
##       ORB     < 35.5    to the right, agree=0.660, adj=0.107, (0 split)
## 
## Node number 4: 7 observations
##   mean=2.714286, MSE=3.346939 
## 
## Node number 5: 59 observations,    complexity param=0.02444941
##   mean=5.033898, MSE=1.795461 
##   left son=10 (49 obs) right son=11 (10 obs)
##   Primary splits:
##       DRB   < 25.7    to the right, improve=0.10608980, (0 missing)
##       ADJ_T < 63.85   to the right, improve=0.09357120, (0 missing)
##       FTR   < 34.75   to the left,  improve=0.09161751, (0 missing)
##       2P_O  < 48.65   to the left,  improve=0.08014462, (0 missing)
##       3P_O  < 34.65   to the left,  improve=0.07217294, (0 missing)
##   Surrogate splits:
##       TORD    < 14.4    to the right, agree=0.864, adj=0.2, (0 split)
##       BARTHAG < 0.96115 to the left,  agree=0.847, adj=0.1, (0 split)
## 
## Node number 6: 75 observations,    complexity param=0.01586788
##   mean=6.013333, MSE=1.079822 
##   left son=12 (21 obs) right son=13 (54 obs)
##   Primary splits:
##       TOR     < 18.35   to the right, improve=0.08630201, (0 missing)
##       BARTHAG < 0.8982  to the right, improve=0.07452509, (0 missing)
##       FTRD    < 27.7    to the left,  improve=0.07249690, (0 missing)
##       3P_O    < 34      to the left,  improve=0.06128509, (0 missing)
##       ORB     < 32.25   to the right, improve=0.05936082, (0 missing)
##   Surrogate splits:
##       ADJOE < 106.85  to the left,  agree=0.760, adj=0.143, (0 split)
##       ADJDE < 91.45   to the left,  agree=0.760, adj=0.143, (0 split)
##       3P_O  < 31.65   to the left,  agree=0.760, adj=0.143, (0 split)
##       EFG_O < 46.45   to the left,  agree=0.747, adj=0.095, (0 split)
##       EFG_D < 44.5    to the left,  agree=0.733, adj=0.048, (0 split)
## 
## Node number 7: 122 observations
##   mean=6.942623, MSE=0.3819538 
## 
## Node number 10: 49 observations,    complexity param=0.02444941
##   mean=4.836735, MSE=1.810079 
##   left son=20 (18 obs) right son=21 (31 obs)
##   Primary splits:
##       FTR     < 33.8    to the left,  improve=0.14402940, (0 missing)
##       BARTHAG < 0.95195 to the right, improve=0.13939320, (0 missing)
##       FTRD    < 36.05   to the left,  improve=0.09987490, (0 missing)
##       ADJ_T   < 63.85   to the right, improve=0.09587360, (0 missing)
##       3P_O    < 34.65   to the left,  improve=0.08835711, (0 missing)
##   Surrogate splits:
##       3P_D  < 33.1    to the right, agree=0.755, adj=0.333, (0 split)
##       EFG_D < 47.1    to the right, agree=0.735, adj=0.278, (0 split)
##       FTRD  < 28.05   to the left,  agree=0.735, adj=0.278, (0 split)
##       ADJOE < 119.15  to the right, agree=0.714, adj=0.222, (0 split)
##       TOR   < 15.5    to the left,  agree=0.694, adj=0.167, (0 split)
## 
## Node number 11: 10 observations
##   mean=6, MSE=0.6 
## 
## Node number 12: 21 observations,    complexity param=0.01586788
##   mean=5.52381, MSE=2.154195 
##   left son=24 (14 obs) right son=25 (7 obs)
##   Primary splits:
##       BARTHAG < 0.8593  to the right, improve=0.1900000, (0 missing)
##       ADJDE   < 95.5    to the left,  improve=0.1698246, (0 missing)
##       3P_O    < 34.05   to the left,  improve=0.1403509, (0 missing)
##       DRB     < 27.55   to the left,  improve=0.1202530, (0 missing)
##       WAB     < 2.45    to the left,  improve=0.1202530, (0 missing)
##   Surrogate splits:
##       ORB   < 32.5    to the right, agree=0.857, adj=0.571, (0 split)
##       3P_O  < 36.25   to the left,  agree=0.810, adj=0.429, (0 split)
##       ADJDE < 95.05   to the left,  agree=0.762, adj=0.286, (0 split)
##       TORD  < 19.45   to the right, agree=0.762, adj=0.286, (0 split)
##       DRB   < 26.3    to the right, agree=0.762, adj=0.286, (0 split)
## 
## Node number 13: 54 observations
##   mean=6.203704, MSE=0.5325789 
## 
## Node number 20: 18 observations
##   mean=4.166667, MSE=1.472222 
## 
## Node number 21: 31 observations,    complexity param=0.02303258
##   mean=5.225806, MSE=1.594173 
##   left son=42 (23 obs) right son=43 (8 obs)
##   Primary splits:
##       FTRD  < 36.05   to the left,  improve=0.22887100, (0 missing)
##       2P_O  < 50.15   to the left,  improve=0.11628510, (0 missing)
##       ADJDE < 91.75   to the left,  improve=0.10005320, (0 missing)
##       ADJOE < 118.65  to the left,  improve=0.08675825, (0 missing)
##       EFG_O < 53.4    to the left,  improve=0.08234334, (0 missing)
##   Surrogate splits:
##       ADJOE < 122.75  to the left,  agree=0.806, adj=0.25, (0 split)
##       ADJDE < 97.6    to the left,  agree=0.806, adj=0.25, (0 split)
##       ORB   < 39.9    to the left,  agree=0.806, adj=0.25, (0 split)
##       FTR   < 43.85   to the left,  agree=0.806, adj=0.25, (0 split)
##       2P_D  < 49.7    to the left,  agree=0.806, adj=0.25, (0 split)
## 
## Node number 24: 14 observations
##   mean=5.071429, MSE=1.637755 
## 
## Node number 25: 7 observations
##   mean=6.428571, MSE=1.959184 
## 
## Node number 42: 23 observations,    complexity param=0.01384799
##   mean=4.869565, MSE=1.504726 
##   left son=84 (8 obs) right son=85 (15 obs)
##   Primary splits:
##       DRB   < 29.9    to the right, improve=0.1964929, (0 missing)
##       WAB   < 6.05    to the left,  improve=0.1535468, (0 missing)
##       SEED  < 3.5     to the right, improve=0.1535468, (0 missing)
##       EFG_O < 54      to the left,  improve=0.1438346, (0 missing)
##       FTRD  < 31.6    to the right, improve=0.1438346, (0 missing)
##   Surrogate splits:
##       2P_O  < 48.75   to the left,  agree=0.826, adj=0.500, (0 split)
##       EFG_O < 50      to the left,  agree=0.783, adj=0.375, (0 split)
##       TORD  < 19.4    to the right, agree=0.783, adj=0.375, (0 split)
##       FTRD  < 32.6    to the right, agree=0.783, adj=0.375, (0 split)
##       ADJOE < 110.4   to the left,  agree=0.739, adj=0.250, (0 split)
## 
## Node number 43: 8 observations
##   mean=6.25, MSE=0.4375 
## 
## Node number 84: 8 observations
##   mean=4.125, MSE=2.859375 
## 
## Node number 85: 15 observations
##   mean=5.266667, MSE=0.3288889
par(mfrow=c(1,1))
plot(tree.bballsubset , uniform=TRUE,margin=0.2,
     main="Regression Tree for College Basketball")
text(tree.bballsubset , use.n=TRUE, all=TRUE, cex=.7)

par(mfrow=c(1,2)) 
rsq.rpart(tree.bballsubset)
## 
## Regression tree:
## rpart(formula = POSTSEASON ~ ., data = bballsubset, subset = train, 
##     method = "anova")
## 
## Variables actually used in tree construction:
## [1] BARTHAG DRB     FTR     FTRD    TOR     WAB    
## 
## Root node error: 491.07/263 = 1.8672
## 
## n=263 (964 observations deleted due to missingness)
## 
##         CP nsplit rel error  xerror     xstd
## 1 0.326523      0   1.00000 1.00559 0.131818
## 2 0.081679      1   0.67348 0.75021 0.094499
## 3 0.068563      2   0.59180 0.74262 0.090661
## 4 0.024449      3   0.52323 0.66329 0.079846
## 5 0.023033      5   0.47434 0.74179 0.084873
## 6 0.015868      6   0.45130 0.76698 0.087547
## 7 0.013848      8   0.41957 0.78231 0.084533
## 8 0.010000      9   0.40572 0.79476 0.083751

tree.bballsubset$cptable
##           CP nsplit rel error    xerror       xstd
## 1 0.32652326      0 1.0000000 1.0055888 0.13181827
## 2 0.08167923      1 0.6734767 0.7502098 0.09449891
## 3 0.06856329      2 0.5917975 0.7426239 0.09066088
## 4 0.02444941      3 0.5232342 0.6632891 0.07984608
## 5 0.02303258      5 0.4743354 0.7417943 0.08487301
## 6 0.01586788      6 0.4513028 0.7669824 0.08754729
## 7 0.01384799      8 0.4195671 0.7823116 0.08453252
## 8 0.01000000      9 0.4057191 0.7947551 0.08375125
which.min(tree.bballsubset$cptable[,"xerror"])
## 4 
## 4
tree.bballsubset$cptable[which.min(tree.bballsubset$cptable[,"xerror"]),"CP"]
## [1] 0.02444941
pfit<- prune(tree.bballsubset, cp=0.0141)


plot(pfit, uniform=TRUE,margin=0.2,
     main= "Pruned Tree For College Basketball")
text(pfit, use.n=TRUE, all=TRUE, cex=.7)

yhat<-predict(tree.bballsubset, newdata=bballsubset[-train,])
bballsubset.test<-bballsubset[-train, "POSTSEASON"]
#plot(yhat, bballsubset.test)
#abline(0,1)

Variable selection

m1 = lm(BARTHAG~ADJOE,data = bball)
summary(m1)
## 
## Call:
## lm(formula = BARTHAG ~ ADJOE, data = bball)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40632 -0.09329 -0.00114  0.09160  0.42278 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.5934725  0.0370158  -70.06   <2e-16 ***
## ADJOE        0.0298867  0.0003574   83.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1306 on 2453 degrees of freedom
## Multiple R-squared:  0.7403, Adjusted R-squared:  0.7402 
## F-statistic:  6992 on 1 and 2453 DF,  p-value: < 2.2e-16
head(bball)
## # A tibble: 6 × 25
##   TEAM       CONF      G     W ADJOE ADJDE BARTHAG EFG_O EFG_D   TOR  TORD   ORB
##   <chr>      <chr> <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 North Car… ACC      40    33  123.  94.9   0.953  52.6  48.1  15.4  18.2  40.7
## 2 Wisconsin  B10      40    36  129.  93.6   0.976  54.8  47.7  12.4  15.8  32.1
## 3 Michigan   B10      40    33  114.  90.4   0.938  53.9  47.7  14    19.5  25.5
## 4 Texas Tech B12      38    31  115.  85.2   0.970  53.5  43    17.7  22.8  27.4
## 5 Gonzaga    WCC      39    37  118.  86.3   0.973  56.6  41.1  16.2  17.1  30  
## 6 Kentucky   SEC      40    29  117.  96.2   0.906  49.9  46    18.1  16.1  42  
## # … with 13 more variables: DRB <dbl>, FTR <dbl>, FTRD <dbl>, `2P_O` <dbl>,
## #   `2P_D` <dbl>, `3P_O` <dbl>, `3P_D` <dbl>, ADJ_T <dbl>, WAB <dbl>,
## #   POSTSEASON <fct>, SEED <dbl>, YEAR <dbl>, WP <dbl>
m2 = lm(BARTHAG~ADJOE+`2P_O`,data = bball)
summary(m2)
## 
## Call:
## lm(formula = BARTHAG ~ ADJOE + `2P_O`, data = bball)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40663 -0.09271 -0.00309  0.08871  0.44927 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.4668580  0.0408737 -60.353  < 2e-16 ***
## ADJOE        0.0320469  0.0004695  68.260  < 2e-16 ***
## `2P_O`      -0.0071671  0.0010233  -7.004  3.2e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1294 on 2452 degrees of freedom
## Multiple R-squared:  0.7454, Adjusted R-squared:  0.7452 
## F-statistic:  3589 on 2 and 2452 DF,  p-value: < 2.2e-16
m3 = lm(BARTHAG~ADJOE+`2P_O`+`2P_D`+`3P_D`+`3P_O`,data = bball)
summary(m3)
## 
## Call:
## lm(formula = BARTHAG ~ ADJOE + `2P_O` + `2P_D` + `3P_D` + `3P_O`, 
##     data = bball)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33149 -0.05572  0.00141  0.05642  0.29583 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.3648547  0.0459268  -7.944 2.95e-15 ***
## ADJOE        0.0284354  0.0003645  78.022  < 2e-16 ***
## `2P_O`      -0.0027316  0.0006781  -4.029 5.78e-05 ***
## `2P_D`      -0.0214161  0.0005859 -36.551  < 2e-16 ***
## `3P_D`      -0.0183048  0.0007765 -23.573  < 2e-16 ***
## `3P_O`      -0.0076490  0.0007631 -10.024  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08478 on 2449 degrees of freedom
## Multiple R-squared:  0.8907, Adjusted R-squared:  0.8905 
## F-statistic:  3993 on 5 and 2449 DF,  p-value: < 2.2e-16
m4 = lm(BARTHAG~`2P_O`+`2P_D`+`3P_D`+`3P_O`+ADJOE+DRB+TORD+TOR+ADJDE,data = bball)
summary(m4)
## 
## Call:
## lm(formula = BARTHAG ~ `2P_O` + `2P_D` + `3P_D` + `3P_O` + ADJOE + 
##     DRB + TORD + TOR + ADJDE, data = bball)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.27477 -0.02754 -0.00095  0.03243  0.43716 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.6393643  0.0400709  15.956  < 2e-16 ***
## `2P_O`       0.0002176  0.0003680   0.591  0.55432    
## `2P_D`       0.0006351  0.0004409   1.440  0.14992    
## `3P_D`      -0.0001402  0.0004666  -0.300  0.76390    
## `3P_O`      -0.0005314  0.0004051  -1.312  0.18979    
## ADJOE        0.0205081  0.0002685  76.368  < 2e-16 ***
## DRB          0.0013435  0.0003744   3.588  0.00034 ***
## TORD        -0.0024315  0.0005465  -4.449 9.01e-06 ***
## TOR         -0.0001412  0.0005527  -0.255  0.79843    
## ADJDE       -0.0220212  0.0003454 -63.751  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04276 on 2445 degrees of freedom
## Multiple R-squared:  0.9723, Adjusted R-squared:  0.9722 
## F-statistic:  9521 on 9 and 2445 DF,  p-value: < 2.2e-16

Here we start to get more variables with less signifigance

m5 = lm(BARTHAG~ADJOE+ADJDE,data = bball)
summary(m5)
## 
## Call:
## lm(formula = BARTHAG ~ ADJOE + ADJDE, data = bball)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.27244 -0.02821 -0.00098  0.03336  0.44834 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.5654764  0.0253317   22.32   <2e-16 ***
## ADJOE        0.0206423  0.0001343  153.66   <2e-16 ***
## ADJDE       -0.0213346  0.0001500 -142.21   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04296 on 2452 degrees of freedom
## Multiple R-squared:  0.9719, Adjusted R-squared:  0.9719 
## F-statistic: 4.243e+04 on 2 and 2452 DF,  p-value: < 2.2e-16

Alone, the adjusted offensive(ADJOE) and deffensive(ADJDE) efficiency or the 2 point and 3 pointer stats, are good predictors for the chance of beating an average D1 basketball team. I would take in the efficiency however, because they are more telling than just the 2 and 3 point stats.

train<-sample(1:dim(bball)[1], floor(dim(bball)[1]/2))
bball.test<-bball[-train, "BARTHAG"]

yhat.bball<-predict(m5, newdata=bball[-train,])
#plot(yhat.bball, bball.test)
#abline(0,1)
mean((yhat.bball-bball.test)^2)
## Warning in mean.default((yhat.bball - bball.test)^2): argument is not numeric or
## logical: returning NA
## [1] NA