## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Rows: 2455 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): TEAM, CONF, POSTSEASON
## dbl (21): G, W, ADJOE, ADJDE, BARTHAG, EFG_O, EFG_D, TOR, TORD, ORB, DRB, FT...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 6 × 25
## TEAM CONF G W ADJOE ADJDE BARTHAG EFG_O EFG_D TOR TORD ORB
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 North Car… ACC 40 33 123. 94.9 0.953 52.6 48.1 15.4 18.2 40.7
## 2 Wisconsin B10 40 36 129. 93.6 0.976 54.8 47.7 12.4 15.8 32.1
## 3 Michigan B10 40 33 114. 90.4 0.938 53.9 47.7 14 19.5 25.5
## 4 Texas Tech B12 38 31 115. 85.2 0.970 53.5 43 17.7 22.8 27.4
## 5 Gonzaga WCC 39 37 118. 86.3 0.973 56.6 41.1 16.2 17.1 30
## 6 Kentucky SEC 40 29 117. 96.2 0.906 49.9 46 18.1 16.1 42
## # … with 13 more variables: DRB <dbl>, FTR <dbl>, FTRD <dbl>, `2P_O` <dbl>,
## # `2P_D` <dbl>, `3P_O` <dbl>, `3P_D` <dbl>, ADJ_T <dbl>, WAB <dbl>,
## # POSTSEASON <fct>, SEED <dbl>, YEAR <dbl>, WP <dbl>
## # A tibble: 6 × 25
## TEAM CONF G W ADJOE ADJDE BARTHAG EFG_O EFG_D TOR TORD ORB
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 North Car… ACC 40 33 123. 94.9 0.953 52.6 48.1 15.4 18.2 40.7
## 2 Wisconsin B10 40 36 129. 93.6 0.976 54.8 47.7 12.4 15.8 32.1
## 3 Michigan B10 40 33 114. 90.4 0.938 53.9 47.7 14 19.5 25.5
## 4 Texas Tech B12 38 31 115. 85.2 0.970 53.5 43 17.7 22.8 27.4
## 5 Gonzaga WCC 39 37 118. 86.3 0.973 56.6 41.1 16.2 17.1 30
## 6 Kentucky SEC 40 29 117. 96.2 0.906 49.9 46 18.1 16.1 42
## # … with 13 more variables: DRB <dbl>, FTR <dbl>, FTRD <dbl>, `2P_O` <dbl>,
## # `2P_D` <dbl>, `3P_O` <dbl>, `3P_D` <dbl>, ADJ_T <dbl>, WAB <dbl>,
## # POSTSEASON <fct>, SEED <dbl>, YEAR <dbl>, WP <dbl>
#install.packages("ISLR")
library(ISLR)
#install.packages("rpart")
library(rpart)
#install.packages("MASS")
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
set.seed(1)
train <- sample(1:nrow(bballsubset), nrow(bballsubset)/2)
tree.bballsubset <- rpart(POSTSEASON~., bballsubset, subset=train, method = "anova")
summary(tree.bballsubset)
## Call:
## rpart(formula = POSTSEASON ~ ., data = bballsubset, subset = train,
## method = "anova")
## n=263 (964 observations deleted due to missingness)
##
## CP nsplit rel error xerror xstd
## 1 0.32652326 0 1.0000000 1.0055888 0.13181827
## 2 0.08167923 1 0.6734767 0.7502098 0.09449891
## 3 0.06856329 2 0.5917975 0.7426239 0.09066088
## 4 0.02444941 3 0.5232342 0.6632891 0.07984608
## 5 0.02303258 5 0.4743354 0.7417943 0.08487301
## 6 0.01586788 6 0.4513028 0.7669824 0.08754729
## 7 0.01384799 8 0.4195671 0.7823116 0.08453252
## 8 0.01000000 9 0.4057191 0.7947551 0.08375125
##
## Variable importance
## BARTHAG WAB SEED ADJOE ADJDE EFG_D DRB FTRD FTR ORB
## 30 18 15 11 7 5 3 2 2 2
## TOR TORD 3P_O 3P_D
## 1 1 1 1
##
## Node number 1: 263 observations, complexity param=0.3265233
## mean=6.136882, MSE=1.867195
## left son=2 (66 obs) right son=3 (197 obs)
## Primary splits:
## BARTHAG < 0.9158 to the right, improve=0.3265233, (0 missing)
## WAB < 1.55 to the right, improve=0.2996935, (0 missing)
## SEED < 4.5 to the left, improve=0.2908791, (0 missing)
## ADJOE < 115.65 to the right, improve=0.1891627, (0 missing)
## ADJDE < 95.5 to the left, improve=0.1842826, (0 missing)
## Surrogate splits:
## WAB < 4.55 to the right, agree=0.897, adj=0.591, (0 split)
## SEED < 4.5 to the left, agree=0.894, adj=0.576, (0 split)
## ADJOE < 117.05 to the right, agree=0.852, adj=0.409, (0 split)
## ADJDE < 92.55 to the left, agree=0.810, adj=0.242, (0 split)
## EFG_D < 44.95 to the left, agree=0.802, adj=0.212, (0 split)
##
## Node number 2: 66 observations, complexity param=0.06856329
## mean=4.787879, MSE=2.470156
## left son=4 (7 obs) right son=5 (59 obs)
## Primary splits:
## BARTHAG < 0.9655 to the right, improve=0.20652310, (0 missing)
## SEED < 1.5 to the left, improve=0.08718610, (0 missing)
## 3P_D < 29.95 to the left, improve=0.07107393, (0 missing)
## 3P_O < 34.2 to the left, improve=0.07107393, (0 missing)
## DRB < 24.35 to the right, improve=0.07056896, (0 missing)
## Surrogate splits:
## WAB < 11.25 to the right, agree=0.909, adj=0.143, (0 split)
##
## Node number 3: 197 observations, complexity param=0.08167923
## mean=6.588832, MSE=0.8512458
## left son=6 (75 obs) right son=7 (122 obs)
## Primary splits:
## WAB < 1.55 to the right, improve=0.23918600, (0 missing)
## BARTHAG < 0.86385 to the right, improve=0.17838540, (0 missing)
## SEED < 9.5 to the left, improve=0.15442470, (0 missing)
## ADJDE < 99 to the left, improve=0.09321217, (0 missing)
## ADJOE < 109.65 to the right, improve=0.08905300, (0 missing)
## Surrogate splits:
## SEED < 8.5 to the left, agree=0.858, adj=0.627, (0 split)
## BARTHAG < 0.84275 to the right, agree=0.843, adj=0.587, (0 split)
## ADJOE < 111.15 to the right, agree=0.751, adj=0.347, (0 split)
## ADJDE < 93.5 to the left, agree=0.701, adj=0.213, (0 split)
## ORB < 35.5 to the right, agree=0.660, adj=0.107, (0 split)
##
## Node number 4: 7 observations
## mean=2.714286, MSE=3.346939
##
## Node number 5: 59 observations, complexity param=0.02444941
## mean=5.033898, MSE=1.795461
## left son=10 (49 obs) right son=11 (10 obs)
## Primary splits:
## DRB < 25.7 to the right, improve=0.10608980, (0 missing)
## ADJ_T < 63.85 to the right, improve=0.09357120, (0 missing)
## FTR < 34.75 to the left, improve=0.09161751, (0 missing)
## 2P_O < 48.65 to the left, improve=0.08014462, (0 missing)
## 3P_O < 34.65 to the left, improve=0.07217294, (0 missing)
## Surrogate splits:
## TORD < 14.4 to the right, agree=0.864, adj=0.2, (0 split)
## BARTHAG < 0.96115 to the left, agree=0.847, adj=0.1, (0 split)
##
## Node number 6: 75 observations, complexity param=0.01586788
## mean=6.013333, MSE=1.079822
## left son=12 (21 obs) right son=13 (54 obs)
## Primary splits:
## TOR < 18.35 to the right, improve=0.08630201, (0 missing)
## BARTHAG < 0.8982 to the right, improve=0.07452509, (0 missing)
## FTRD < 27.7 to the left, improve=0.07249690, (0 missing)
## 3P_O < 34 to the left, improve=0.06128509, (0 missing)
## ORB < 32.25 to the right, improve=0.05936082, (0 missing)
## Surrogate splits:
## ADJOE < 106.85 to the left, agree=0.760, adj=0.143, (0 split)
## ADJDE < 91.45 to the left, agree=0.760, adj=0.143, (0 split)
## 3P_O < 31.65 to the left, agree=0.760, adj=0.143, (0 split)
## EFG_O < 46.45 to the left, agree=0.747, adj=0.095, (0 split)
## EFG_D < 44.5 to the left, agree=0.733, adj=0.048, (0 split)
##
## Node number 7: 122 observations
## mean=6.942623, MSE=0.3819538
##
## Node number 10: 49 observations, complexity param=0.02444941
## mean=4.836735, MSE=1.810079
## left son=20 (18 obs) right son=21 (31 obs)
## Primary splits:
## FTR < 33.8 to the left, improve=0.14402940, (0 missing)
## BARTHAG < 0.95195 to the right, improve=0.13939320, (0 missing)
## FTRD < 36.05 to the left, improve=0.09987490, (0 missing)
## ADJ_T < 63.85 to the right, improve=0.09587360, (0 missing)
## 3P_O < 34.65 to the left, improve=0.08835711, (0 missing)
## Surrogate splits:
## 3P_D < 33.1 to the right, agree=0.755, adj=0.333, (0 split)
## EFG_D < 47.1 to the right, agree=0.735, adj=0.278, (0 split)
## FTRD < 28.05 to the left, agree=0.735, adj=0.278, (0 split)
## ADJOE < 119.15 to the right, agree=0.714, adj=0.222, (0 split)
## TOR < 15.5 to the left, agree=0.694, adj=0.167, (0 split)
##
## Node number 11: 10 observations
## mean=6, MSE=0.6
##
## Node number 12: 21 observations, complexity param=0.01586788
## mean=5.52381, MSE=2.154195
## left son=24 (14 obs) right son=25 (7 obs)
## Primary splits:
## BARTHAG < 0.8593 to the right, improve=0.1900000, (0 missing)
## ADJDE < 95.5 to the left, improve=0.1698246, (0 missing)
## 3P_O < 34.05 to the left, improve=0.1403509, (0 missing)
## DRB < 27.55 to the left, improve=0.1202530, (0 missing)
## WAB < 2.45 to the left, improve=0.1202530, (0 missing)
## Surrogate splits:
## ORB < 32.5 to the right, agree=0.857, adj=0.571, (0 split)
## 3P_O < 36.25 to the left, agree=0.810, adj=0.429, (0 split)
## ADJDE < 95.05 to the left, agree=0.762, adj=0.286, (0 split)
## TORD < 19.45 to the right, agree=0.762, adj=0.286, (0 split)
## DRB < 26.3 to the right, agree=0.762, adj=0.286, (0 split)
##
## Node number 13: 54 observations
## mean=6.203704, MSE=0.5325789
##
## Node number 20: 18 observations
## mean=4.166667, MSE=1.472222
##
## Node number 21: 31 observations, complexity param=0.02303258
## mean=5.225806, MSE=1.594173
## left son=42 (23 obs) right son=43 (8 obs)
## Primary splits:
## FTRD < 36.05 to the left, improve=0.22887100, (0 missing)
## 2P_O < 50.15 to the left, improve=0.11628510, (0 missing)
## ADJDE < 91.75 to the left, improve=0.10005320, (0 missing)
## ADJOE < 118.65 to the left, improve=0.08675825, (0 missing)
## EFG_O < 53.4 to the left, improve=0.08234334, (0 missing)
## Surrogate splits:
## ADJOE < 122.75 to the left, agree=0.806, adj=0.25, (0 split)
## ADJDE < 97.6 to the left, agree=0.806, adj=0.25, (0 split)
## ORB < 39.9 to the left, agree=0.806, adj=0.25, (0 split)
## FTR < 43.85 to the left, agree=0.806, adj=0.25, (0 split)
## 2P_D < 49.7 to the left, agree=0.806, adj=0.25, (0 split)
##
## Node number 24: 14 observations
## mean=5.071429, MSE=1.637755
##
## Node number 25: 7 observations
## mean=6.428571, MSE=1.959184
##
## Node number 42: 23 observations, complexity param=0.01384799
## mean=4.869565, MSE=1.504726
## left son=84 (8 obs) right son=85 (15 obs)
## Primary splits:
## DRB < 29.9 to the right, improve=0.1964929, (0 missing)
## WAB < 6.05 to the left, improve=0.1535468, (0 missing)
## SEED < 3.5 to the right, improve=0.1535468, (0 missing)
## EFG_O < 54 to the left, improve=0.1438346, (0 missing)
## FTRD < 31.6 to the right, improve=0.1438346, (0 missing)
## Surrogate splits:
## 2P_O < 48.75 to the left, agree=0.826, adj=0.500, (0 split)
## EFG_O < 50 to the left, agree=0.783, adj=0.375, (0 split)
## TORD < 19.4 to the right, agree=0.783, adj=0.375, (0 split)
## FTRD < 32.6 to the right, agree=0.783, adj=0.375, (0 split)
## ADJOE < 110.4 to the left, agree=0.739, adj=0.250, (0 split)
##
## Node number 43: 8 observations
## mean=6.25, MSE=0.4375
##
## Node number 84: 8 observations
## mean=4.125, MSE=2.859375
##
## Node number 85: 15 observations
## mean=5.266667, MSE=0.3288889
par(mfrow=c(1,1))
plot(tree.bballsubset , uniform=TRUE,margin=0.2,
main="Regression Tree for College Basketball")
text(tree.bballsubset , use.n=TRUE, all=TRUE, cex=.7)

par(mfrow=c(1,2))
rsq.rpart(tree.bballsubset)
##
## Regression tree:
## rpart(formula = POSTSEASON ~ ., data = bballsubset, subset = train,
## method = "anova")
##
## Variables actually used in tree construction:
## [1] BARTHAG DRB FTR FTRD TOR WAB
##
## Root node error: 491.07/263 = 1.8672
##
## n=263 (964 observations deleted due to missingness)
##
## CP nsplit rel error xerror xstd
## 1 0.326523 0 1.00000 1.00559 0.131818
## 2 0.081679 1 0.67348 0.75021 0.094499
## 3 0.068563 2 0.59180 0.74262 0.090661
## 4 0.024449 3 0.52323 0.66329 0.079846
## 5 0.023033 5 0.47434 0.74179 0.084873
## 6 0.015868 6 0.45130 0.76698 0.087547
## 7 0.013848 8 0.41957 0.78231 0.084533
## 8 0.010000 9 0.40572 0.79476 0.083751

tree.bballsubset$cptable
## CP nsplit rel error xerror xstd
## 1 0.32652326 0 1.0000000 1.0055888 0.13181827
## 2 0.08167923 1 0.6734767 0.7502098 0.09449891
## 3 0.06856329 2 0.5917975 0.7426239 0.09066088
## 4 0.02444941 3 0.5232342 0.6632891 0.07984608
## 5 0.02303258 5 0.4743354 0.7417943 0.08487301
## 6 0.01586788 6 0.4513028 0.7669824 0.08754729
## 7 0.01384799 8 0.4195671 0.7823116 0.08453252
## 8 0.01000000 9 0.4057191 0.7947551 0.08375125
which.min(tree.bballsubset$cptable[,"xerror"])
## 4
## 4
tree.bballsubset$cptable[which.min(tree.bballsubset$cptable[,"xerror"]),"CP"]
## [1] 0.02444941
pfit<- prune(tree.bballsubset, cp=0.0141)
plot(pfit, uniform=TRUE,margin=0.2,
main= "Pruned Tree For College Basketball")
text(pfit, use.n=TRUE, all=TRUE, cex=.7)

yhat<-predict(tree.bballsubset, newdata=bballsubset[-train,])
bballsubset.test<-bballsubset[-train, "POSTSEASON"]
#plot(yhat, bballsubset.test)
#abline(0,1)
Variable selection
m1 = lm(BARTHAG~ADJOE,data = bball)
summary(m1)
##
## Call:
## lm(formula = BARTHAG ~ ADJOE, data = bball)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40632 -0.09329 -0.00114 0.09160 0.42278
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.5934725 0.0370158 -70.06 <2e-16 ***
## ADJOE 0.0298867 0.0003574 83.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1306 on 2453 degrees of freedom
## Multiple R-squared: 0.7403, Adjusted R-squared: 0.7402
## F-statistic: 6992 on 1 and 2453 DF, p-value: < 2.2e-16
head(bball)
## # A tibble: 6 × 25
## TEAM CONF G W ADJOE ADJDE BARTHAG EFG_O EFG_D TOR TORD ORB
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 North Car… ACC 40 33 123. 94.9 0.953 52.6 48.1 15.4 18.2 40.7
## 2 Wisconsin B10 40 36 129. 93.6 0.976 54.8 47.7 12.4 15.8 32.1
## 3 Michigan B10 40 33 114. 90.4 0.938 53.9 47.7 14 19.5 25.5
## 4 Texas Tech B12 38 31 115. 85.2 0.970 53.5 43 17.7 22.8 27.4
## 5 Gonzaga WCC 39 37 118. 86.3 0.973 56.6 41.1 16.2 17.1 30
## 6 Kentucky SEC 40 29 117. 96.2 0.906 49.9 46 18.1 16.1 42
## # … with 13 more variables: DRB <dbl>, FTR <dbl>, FTRD <dbl>, `2P_O` <dbl>,
## # `2P_D` <dbl>, `3P_O` <dbl>, `3P_D` <dbl>, ADJ_T <dbl>, WAB <dbl>,
## # POSTSEASON <fct>, SEED <dbl>, YEAR <dbl>, WP <dbl>
m2 = lm(BARTHAG~ADJOE+`2P_O`,data = bball)
summary(m2)
##
## Call:
## lm(formula = BARTHAG ~ ADJOE + `2P_O`, data = bball)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40663 -0.09271 -0.00309 0.08871 0.44927
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.4668580 0.0408737 -60.353 < 2e-16 ***
## ADJOE 0.0320469 0.0004695 68.260 < 2e-16 ***
## `2P_O` -0.0071671 0.0010233 -7.004 3.2e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1294 on 2452 degrees of freedom
## Multiple R-squared: 0.7454, Adjusted R-squared: 0.7452
## F-statistic: 3589 on 2 and 2452 DF, p-value: < 2.2e-16
m3 = lm(BARTHAG~ADJOE+`2P_O`+`2P_D`+`3P_D`+`3P_O`,data = bball)
summary(m3)
##
## Call:
## lm(formula = BARTHAG ~ ADJOE + `2P_O` + `2P_D` + `3P_D` + `3P_O`,
## data = bball)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33149 -0.05572 0.00141 0.05642 0.29583
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3648547 0.0459268 -7.944 2.95e-15 ***
## ADJOE 0.0284354 0.0003645 78.022 < 2e-16 ***
## `2P_O` -0.0027316 0.0006781 -4.029 5.78e-05 ***
## `2P_D` -0.0214161 0.0005859 -36.551 < 2e-16 ***
## `3P_D` -0.0183048 0.0007765 -23.573 < 2e-16 ***
## `3P_O` -0.0076490 0.0007631 -10.024 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08478 on 2449 degrees of freedom
## Multiple R-squared: 0.8907, Adjusted R-squared: 0.8905
## F-statistic: 3993 on 5 and 2449 DF, p-value: < 2.2e-16
m4 = lm(BARTHAG~`2P_O`+`2P_D`+`3P_D`+`3P_O`+ADJOE+DRB+TORD+TOR+ADJDE,data = bball)
summary(m4)
##
## Call:
## lm(formula = BARTHAG ~ `2P_O` + `2P_D` + `3P_D` + `3P_O` + ADJOE +
## DRB + TORD + TOR + ADJDE, data = bball)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27477 -0.02754 -0.00095 0.03243 0.43716
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6393643 0.0400709 15.956 < 2e-16 ***
## `2P_O` 0.0002176 0.0003680 0.591 0.55432
## `2P_D` 0.0006351 0.0004409 1.440 0.14992
## `3P_D` -0.0001402 0.0004666 -0.300 0.76390
## `3P_O` -0.0005314 0.0004051 -1.312 0.18979
## ADJOE 0.0205081 0.0002685 76.368 < 2e-16 ***
## DRB 0.0013435 0.0003744 3.588 0.00034 ***
## TORD -0.0024315 0.0005465 -4.449 9.01e-06 ***
## TOR -0.0001412 0.0005527 -0.255 0.79843
## ADJDE -0.0220212 0.0003454 -63.751 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04276 on 2445 degrees of freedom
## Multiple R-squared: 0.9723, Adjusted R-squared: 0.9722
## F-statistic: 9521 on 9 and 2445 DF, p-value: < 2.2e-16
Here we start to get more variables with less signifigance
m5 = lm(BARTHAG~ADJOE+ADJDE,data = bball)
summary(m5)
##
## Call:
## lm(formula = BARTHAG ~ ADJOE + ADJDE, data = bball)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27244 -0.02821 -0.00098 0.03336 0.44834
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5654764 0.0253317 22.32 <2e-16 ***
## ADJOE 0.0206423 0.0001343 153.66 <2e-16 ***
## ADJDE -0.0213346 0.0001500 -142.21 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04296 on 2452 degrees of freedom
## Multiple R-squared: 0.9719, Adjusted R-squared: 0.9719
## F-statistic: 4.243e+04 on 2 and 2452 DF, p-value: < 2.2e-16
Alone, the adjusted offensive(ADJOE) and deffensive(ADJDE)
efficiency or the 2 point and 3 pointer stats, are good predictors for
the chance of beating an average D1 basketball team. I would take in the
efficiency however, because they are more telling than just the 2 and 3
point stats.
train<-sample(1:dim(bball)[1], floor(dim(bball)[1]/2))
bball.test<-bball[-train, "BARTHAG"]
yhat.bball<-predict(m5, newdata=bball[-train,])
#plot(yhat.bball, bball.test)
#abline(0,1)
mean((yhat.bball-bball.test)^2)
## Warning in mean.default((yhat.bball - bball.test)^2): argument is not numeric or
## logical: returning NA
## [1] NA