Load packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-8
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(yardstick)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
Basketball_raw <- read.csv("Basketball_raw.csv")
basketball_filtered <- Basketball_raw %>%
filter(!is.na(HOF))
basketball_filtered$HOF <- as.factor(basketball_filtered$HOF)
Split dataset into training and test
set.seed(123)
sample <- sample(2, nrow(basketball_filtered), replace = TRUE, prob = c(0.7, 0.3))
train <- basketball_filtered[sample == 1, ]
test <- basketball_filtered[sample == 2, ]
Logistic Regression Model
model <- glm(HOF ~ MP, data = train, family = binomial, maxit = 1000)
summary(model)
##
## Call:
## glm(formula = HOF ~ MP, family = binomial, data = train, maxit = 1000)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.751e+00 6.686e-01 -10.097 < 2e-16 ***
## MP 1.784e-04 2.201e-05 8.104 5.33e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 331.56 on 489 degrees of freedom
## Residual deviance: 229.53 on 488 degrees of freedom
## AIC: 233.53
##
## Number of Fisher Scoring iterations: 6
Make predictions
predictions <- predict(model, newdata = test, type = "response")
test <- test[!is.na(predictions), ]
predictions <- predictions[!is.na(predictions)]
predicted_classes <- ifelse(predictions > 0.5, 1, 0)
Confusion matrix
conf_matrix <- table(predicted_classes, test$HOF)
print("Confusion Matrix:")
## [1] "Confusion Matrix:"
print(conf_matrix)
##
## predicted_classes No Yes
## 0 191 7
## 1 4 10
Compute performance metrics
# accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
print(paste("Accuracy:", accuracy))
## [1] "Accuracy: 0.94811320754717"
# precision
precision <- conf_matrix[2, 2] / sum(conf_matrix[, 2])
print(paste("Precision:", precision))
## [1] "Precision: 0.588235294117647"
# sensitivity
sens <- conf_matrix[2, 2] / sum(conf_matrix[2, ])
print(paste("Sensitivity:", sens))
## [1] "Sensitivity: 0.714285714285714"
# specificity
spec <- conf_matrix[1, 1] / sum(conf_matrix[1, ])
print(paste("Specificity:", spec))
## [1] "Specificity: 0.964646464646465"
# F1 score
f1_score <- 2 * (precision * spec) / (precision + spec)
print(paste("F1 Score:", f1_score))
## [1] "F1 Score: 0.730820738473312"
# ROC AUC
roc_curve <- roc(test$HOF, predictions)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
roc_auc <- auc(roc_curve)
print(paste("ROC-AUC Score:", roc_auc))
## [1] "ROC-AUC Score: 0.971342383107089"
Graph ROC curve
# Plot ROC curve
plot.roc(roc_curve, main = "ROC Curve", col = "blue")
# Add diagonal reference line for random classifier
abline(a = 0, b = 1, lty = 2, col = "red")
# Add legend
legend("bottomright", legend = c("ROC Curve", "Random Classifier"),
col = c("blue", "red"), lty = 1:2)

Compare predictions to actual
test$Predictions <- predictions
player_predictions <- test[, c("Player", "HOF", "Predictions")]
print(player_predictions)
## Player HOF Predictions
## 2 Michael Jordan Yes 0.637549158
## 4 Tim Duncan Yes 0.845372140
## 5 Dirk Nowitzki Yes 0.917761045
## 8 David Robinson Yes 0.345790805
## 11 Kobe Bryant Yes 0.872709414
## 16 Robert Parish Yes 0.802488043
## 20 Pau Gasol Yes 0.637136837
## 21 Jason Kidd Yes 0.899173441
## 24 Steve Nash Yes 0.509979867
## 26 Vince Carter Yes 0.820566329
## 31 Buck Williams No 0.695072141
## 32 Horace Grant No 0.534543632
## 34 Dikembe Mutombo Yes 0.453124263
## 37 Tony Parker Yes 0.519336918
## 50 Rasheed Wallace No 0.429029496
## 53 Jason Terry No 0.678575604
## 58 A.C. Green No 0.442581924
## 59 Allen Iverson Yes 0.488353744
## 65 Amar'e Stoudemire No 0.112273036
## 67 Terry Cummings No 0.330896760
## 68 Rashard Lewis No 0.316951632
## 69 Hersey Hawkins No 0.261797409
## 71 P.J. Brown No 0.327941301
## 73 Alonzo Mourning Yes 0.107407439
## 84 Joe Dumars Yes 0.381602414
## 87 Michael Finley No 0.506725205
## 88 Tim Hardaway Yes 0.216222405
## 89 Chris Webber Yes 0.222978268
## 97 Marcus Camby No 0.163249335
## 104 Tom Chambers No 0.331845345
## 106 Dan Majerle No 0.203795216
## 107 Stephon Marbury No 0.256897496
## 111 Brad Miller No 0.085175154
## 114 Rolando Blackman No 0.263628705
## 115 Kiki Vandeweghe No 0.084953017
## 118 Byron Scott No 0.202236606
## 126 Bill Cartwright No 0.136217935
## 132 Mark Price No 0.051902805
## 134 Brent Barry No 0.073542698
## 137 Larry Johnson No 0.102547571
## 138 Jerome Kersey No 0.149853535
## 139 John Drew No 0.054306419
## 145 Robert Horry No 0.127600385
## 150 Ron Harper No 0.234046545
## 151 Eddie Johnson No 0.281918281
## 167 Corey Maggette No 0.070152940
## 173 Marcin Gortat No 0.044783278
## 174 Fat Lever No 0.075648316
## 179 Grant Long No 0.159149025
## 181 Gerald Wallace No 0.087910766
## 183 Antonio Davis No 0.109285865
## 189 Mickey Johnson No 0.091909703
## 190 Mike Dunleavy No 0.133079448
## 193 Armen Gilliam No 0.115276693
## 195 Rik Smits No 0.067209468
## 202 Michael Redd No 0.040511391
## 206 Doug Christie No 0.109199072
## 216 Steve Francis No 0.052538491
## 219 John Lucas No 0.100449021
## 220 Scott Wedman No 0.106589286
## 222 Brad Davis No 0.058816250
## 223 Wally Szczerbiak No 0.040152357
## 230 Jerry Stackhouse No 0.204258740
## 238 Samuel Dalembert No 0.052662948
## 240 Josh Smith No 0.171096388
## 246 Sleepy Floyd No 0.114587143
## 248 Johnny Newman No 0.157272183
## 249 Nate McMillan No 0.043067908
## 250 Mike Mitchell No 0.085175154
## 256 Danny Schayes No 0.055669009
## 260 Otis Birdsong No 0.052494110
## 261 Kenyon Martin No 0.067590709
## 262 Sedale Threatt No 0.060153541
## 264 Xavier McDaniel No 0.094869819
## 271 Jalen Rose No 0.145586607
## 275 Michael Adams No 0.042950430
## 276 Kenny Smith No 0.057943505
## 277 Olden Polynice No 0.090123971
## 281 J.R. Smith No 0.112646949
## 294 Kelly Tripucka No 0.046873488
## 295 Caldwell Jones No 0.116244458
## 296 Cliff Levingston No 0.023538163
## 297 Wesley Person No 0.045058800
## 300 Stacey Augmon No 0.052769847
## 301 Brendan Haywood No 0.031711770
## 303 Eric Snow No 0.067568228
## 304 Bruce Bowen No 0.078671111
## 313 Mark West No 0.041273985
## 316 Keith Van Horn No 0.028933705
## 317 Sam Mitchell No 0.058373445
## 320 Craig Ehlo No 0.047774222
## 321 Phil Smith No 0.034484731
## 324 Terry Tyler No 0.053505823
## 327 Cedric Ceballos No 0.015880025
## 330 Tom Gugliotta No 0.072492245
## 333 Aaron McKie No 0.034425385
## 334 Rasho Nesterović No 0.028108330
## 340 Thabo Sefolosha No 0.033872327
## 347 Tony Allen No 0.028461339
## 352 Kurt Rambis No 0.020967650
## 356 Jerome Williams No 0.011253138
## 360 Lonnie Shelton No 0.030900345
## 363 Jim Chones No 0.034242038
## 366 Rony Seikaly No 0.050473345
## 373 Jarrett Jack No 0.079816496
## 376 Harvey Grant No 0.043422184
## 377 Tony Battie No 0.026525632
## 380 Herb Williams No 0.158434171
## 382 Luke Ridnour No 0.052458630
## 384 Kermit Washington No 0.011372853
## 386 Greg Anthony No 0.019385003
## 391 Morris Peterson No 0.035692569
## 393 Micheal Ray Richardson No 0.031217080
## 394 Tim Thomas No 0.049971311
## 400 Corliss Williamson No 0.032091912
## 401 Mike Woodson No 0.039939770
## 403 Nazr Mohammed No 0.019524525
## 407 John Salmons No 0.073895952
## 412 Dee Brown No 0.023055155
## 417 Charlie Ward No 0.014210985
## 425 Benoit Benjamin No 0.055071862
## 430 Chris Gatling No 0.013433074
## 431 Gerald Henderson No 0.032690134
## 434 Beno Udrih No 0.029109645
## 443 Bob Gross No 0.010901418
## 445 Lorenzen Wright No 0.030927066
## 446 Marco Belinelli No 0.036827027
## 447 Scott Skiles No 0.022839180
## 456 Allen Leavell No 0.020781702
## 457 Frank Brickowski No 0.020890887
## 458 Voshon Lenard No 0.017757110
## 461 Bonzi Wells No 0.017097386
## 470 Chuck Hayes No 0.009532443
## 472 Aaron Williams No 0.009849129
## 474 Kelvin Cato No 0.007712389
## 480 Tyreke Evans No 0.029398399
## 482 Bob Sura No 0.018545429
## 485 Ricky Davis No 0.055220581
## 490 Willie Anderson No 0.019809991
## 491 Nate Robinson No 0.013714916
## 494 Joe Meriweather No 0.016147052
## 496 Austin Croshere No 0.008984439
## 500 Darrell Walker No 0.031281884
## 509 Andrew Toney No 0.010965076
## 513 Pooh Richardson No 0.035895739
## 515 Will Perdue No 0.010495532
## 518 Kevin Grevey No 0.017439459
## 520 Billy Paultz No 0.013129182
## 527 Reggie Williams No 0.019945534
## 529 Luc Mbah a Moute No 0.020032897
## 531 Marreese Speights No 0.007743852
## 534 Josh Childress No 0.007463889
## 536 Clemon Johnson No 0.012409744
## 538 Tiago Splitter No 0.003926973
## 541 Chris Duhon No 0.018316298
## 543 Dante Cunningham No 0.016340895
## 545 Reggie King No 0.008772532
## 546 Rex Chapman No 0.036789080
## 548 Kevin Duckworth No 0.025677717
## 549 Martell Webster No 0.013802056
## 554 Chris Kaman No 0.037426460
## 562 Ron Anderson No 0.017034546
## 563 Tracy Murray No 0.010029937
## 568 Shandon Anderson No 0.019713235
## 570 Jeremy Lin No 0.010235695
## 572 Randy Foye No 0.035162232
## 575 Bill Robinzine No 0.008908520
## 576 Gary Trent No 0.006751914
## 581 Desmond Mason No 0.037420034
## 582 Othella Harrington No 0.013826359
## 583 Jeff McInnis No 0.019647843
## 585 Joel Przybilla No 0.009395319
## 589 Lionel Hollins No 0.030491657
## 593 Spencer Hawes No 0.018364473
## 596 Carlos Delfino No 0.009144670
## 599 Corey Brewer No 0.032214040
## 606 Popeye Jones No 0.010909114
## 608 James Silas No 0.005426029
## 614 Johnny Dawkins No 0.016295080
## 616 Eddy Curry No 0.011977898
## 617 Alvin Williams No 0.010968946
## 618 Nenad Krstić No 0.007617411
## 619 Tim McCormick No 0.007408589
## 621 Stromile Swift No 0.007971917
## 622 Harvey Catchings No 0.012132994
## 623 Chris Whitney No 0.006978112
## 628 Terry Catledge No 0.012925055
## 631 Greg Buckner No 0.008101326
## 632 Ömer Aşık No 0.006017147
## 634 Nikola Mirotić No 0.004612638
## 637 Tony Campbell No 0.017238856
## 638 John Henson No 0.005541799
## 639 Keith Bogans No 0.015372433
## 642 Tyler Hansbrough No 0.004231955
## 643 Evan Turner No 0.033155862
## 647 Jason Collins No 0.016627204
## 651 Eddie House No 0.010579229
## 652 Jayson Williams No 0.006679328
## 655 Lamond Murray No 0.030618476
## 656 Lewis Lloyd No 0.006223295
## 657 Mickaël Piétrus No 0.009762525
## 661 Tony Delk No 0.009343990
## 662 Howard Eisley No 0.019945534
## 664 Andrew DeClercq No 0.006729221
## 667 Earl Watson No 0.036486858
## 677 Nikola Peković No 0.003879809
## 681 Jerryd Bayless No 0.010731550
## 682 Nick Young No 0.021411620
## 683 Glen Davis No 0.008058435
## 684 Keyon Dooling No 0.014346567
## 685 Derek Strong No 0.005344811
## 701 Timofey Mozgov No 0.005007799
#write_xlsx(player_predictions, "player_predictions.xlsx")