Load packages

library(dplyr)      
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)    
library(glmnet)     
## Loading required package: Matrix
## Loaded glmnet 4.1-8
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(yardstick)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
Basketball_raw <- read.csv("Basketball_raw.csv")
basketball_filtered <- Basketball_raw %>% 
  filter(!is.na(HOF))
basketball_filtered$HOF <- as.factor(basketball_filtered$HOF)

Split dataset into training and test

set.seed(123)
sample <- sample(2, nrow(basketball_filtered), replace = TRUE, prob = c(0.7, 0.3))
train <- basketball_filtered[sample == 1, ]
test <- basketball_filtered[sample == 2, ]

Logistic Regression Model

model <- glm(HOF ~ MP, data = train, family = binomial, maxit = 1000)

summary(model)
## 
## Call:
## glm(formula = HOF ~ MP, family = binomial, data = train, maxit = 1000)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -6.751e+00  6.686e-01 -10.097  < 2e-16 ***
## MP           1.784e-04  2.201e-05   8.104 5.33e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 331.56  on 489  degrees of freedom
## Residual deviance: 229.53  on 488  degrees of freedom
## AIC: 233.53
## 
## Number of Fisher Scoring iterations: 6

Make predictions

predictions <- predict(model, newdata = test, type = "response")
test <- test[!is.na(predictions), ]
predictions <- predictions[!is.na(predictions)]
predicted_classes <- ifelse(predictions > 0.5, 1, 0)

Confusion matrix

conf_matrix <- table(predicted_classes, test$HOF)

print("Confusion Matrix:")
## [1] "Confusion Matrix:"
print(conf_matrix)
##                  
## predicted_classes  No Yes
##                 0 191   7
##                 1   4  10

Compute performance metrics

# accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
print(paste("Accuracy:", accuracy))
## [1] "Accuracy: 0.94811320754717"
# precision
precision <- conf_matrix[2, 2] / sum(conf_matrix[, 2])
print(paste("Precision:", precision))
## [1] "Precision: 0.588235294117647"
# sensitivity
sens <- conf_matrix[2, 2] / sum(conf_matrix[2, ])
print(paste("Sensitivity:", sens))
## [1] "Sensitivity: 0.714285714285714"
# specificity
spec <- conf_matrix[1, 1] / sum(conf_matrix[1, ])
print(paste("Specificity:", spec))
## [1] "Specificity: 0.964646464646465"
# F1 score
f1_score <- 2 * (precision * spec) / (precision + spec)
print(paste("F1 Score:", f1_score))
## [1] "F1 Score: 0.730820738473312"
# ROC AUC
roc_curve <- roc(test$HOF, predictions)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
roc_auc <- auc(roc_curve)
print(paste("ROC-AUC Score:", roc_auc))
## [1] "ROC-AUC Score: 0.971342383107089"

Graph ROC curve

# Plot ROC curve
plot.roc(roc_curve, main = "ROC Curve", col = "blue")

# Add diagonal reference line for random classifier
abline(a = 0, b = 1, lty = 2, col = "red")

# Add legend
legend("bottomright", legend = c("ROC Curve", "Random Classifier"),
       col = c("blue", "red"), lty = 1:2)

Compare predictions to actual

test$Predictions <- predictions
player_predictions <- test[, c("Player", "HOF", "Predictions")]
print(player_predictions)
##                     Player HOF Predictions
## 2           Michael Jordan Yes 0.637549158
## 4               Tim Duncan Yes 0.845372140
## 5            Dirk Nowitzki Yes 0.917761045
## 8           David Robinson Yes 0.345790805
## 11             Kobe Bryant Yes 0.872709414
## 16           Robert Parish Yes 0.802488043
## 20               Pau Gasol Yes 0.637136837
## 21              Jason Kidd Yes 0.899173441
## 24              Steve Nash Yes 0.509979867
## 26            Vince Carter Yes 0.820566329
## 31           Buck Williams  No 0.695072141
## 32            Horace Grant  No 0.534543632
## 34         Dikembe Mutombo Yes 0.453124263
## 37             Tony Parker Yes 0.519336918
## 50         Rasheed Wallace  No 0.429029496
## 53             Jason Terry  No 0.678575604
## 58              A.C. Green  No 0.442581924
## 59           Allen Iverson Yes 0.488353744
## 65       Amar'e Stoudemire  No 0.112273036
## 67          Terry Cummings  No 0.330896760
## 68           Rashard Lewis  No 0.316951632
## 69          Hersey Hawkins  No 0.261797409
## 71              P.J. Brown  No 0.327941301
## 73         Alonzo Mourning Yes 0.107407439
## 84              Joe Dumars Yes 0.381602414
## 87          Michael Finley  No 0.506725205
## 88            Tim Hardaway Yes 0.216222405
## 89            Chris Webber Yes 0.222978268
## 97            Marcus Camby  No 0.163249335
## 104           Tom Chambers  No 0.331845345
## 106            Dan Majerle  No 0.203795216
## 107        Stephon Marbury  No 0.256897496
## 111            Brad Miller  No 0.085175154
## 114       Rolando Blackman  No 0.263628705
## 115        Kiki Vandeweghe  No 0.084953017
## 118            Byron Scott  No 0.202236606
## 126        Bill Cartwright  No 0.136217935
## 132             Mark Price  No 0.051902805
## 134            Brent Barry  No 0.073542698
## 137          Larry Johnson  No 0.102547571
## 138          Jerome Kersey  No 0.149853535
## 139              John Drew  No 0.054306419
## 145           Robert Horry  No 0.127600385
## 150             Ron Harper  No 0.234046545
## 151          Eddie Johnson  No 0.281918281
## 167         Corey Maggette  No 0.070152940
## 173          Marcin Gortat  No 0.044783278
## 174              Fat Lever  No 0.075648316
## 179             Grant Long  No 0.159149025
## 181         Gerald Wallace  No 0.087910766
## 183          Antonio Davis  No 0.109285865
## 189         Mickey Johnson  No 0.091909703
## 190          Mike Dunleavy  No 0.133079448
## 193          Armen Gilliam  No 0.115276693
## 195              Rik Smits  No 0.067209468
## 202           Michael Redd  No 0.040511391
## 206          Doug Christie  No 0.109199072
## 216          Steve Francis  No 0.052538491
## 219             John Lucas  No 0.100449021
## 220           Scott Wedman  No 0.106589286
## 222             Brad Davis  No 0.058816250
## 223       Wally Szczerbiak  No 0.040152357
## 230       Jerry Stackhouse  No 0.204258740
## 238       Samuel Dalembert  No 0.052662948
## 240             Josh Smith  No 0.171096388
## 246           Sleepy Floyd  No 0.114587143
## 248          Johnny Newman  No 0.157272183
## 249          Nate McMillan  No 0.043067908
## 250          Mike Mitchell  No 0.085175154
## 256          Danny Schayes  No 0.055669009
## 260          Otis Birdsong  No 0.052494110
## 261          Kenyon Martin  No 0.067590709
## 262         Sedale Threatt  No 0.060153541
## 264        Xavier McDaniel  No 0.094869819
## 271             Jalen Rose  No 0.145586607
## 275          Michael Adams  No 0.042950430
## 276            Kenny Smith  No 0.057943505
## 277         Olden Polynice  No 0.090123971
## 281             J.R. Smith  No 0.112646949
## 294         Kelly Tripucka  No 0.046873488
## 295         Caldwell Jones  No 0.116244458
## 296       Cliff Levingston  No 0.023538163
## 297          Wesley Person  No 0.045058800
## 300          Stacey Augmon  No 0.052769847
## 301        Brendan Haywood  No 0.031711770
## 303              Eric Snow  No 0.067568228
## 304            Bruce Bowen  No 0.078671111
## 313              Mark West  No 0.041273985
## 316         Keith Van Horn  No 0.028933705
## 317           Sam Mitchell  No 0.058373445
## 320             Craig Ehlo  No 0.047774222
## 321             Phil Smith  No 0.034484731
## 324            Terry Tyler  No 0.053505823
## 327        Cedric Ceballos  No 0.015880025
## 330          Tom Gugliotta  No 0.072492245
## 333            Aaron McKie  No 0.034425385
## 334       Rasho Nesterović  No 0.028108330
## 340        Thabo Sefolosha  No 0.033872327
## 347             Tony Allen  No 0.028461339
## 352            Kurt Rambis  No 0.020967650
## 356        Jerome Williams  No 0.011253138
## 360         Lonnie Shelton  No 0.030900345
## 363             Jim Chones  No 0.034242038
## 366           Rony Seikaly  No 0.050473345
## 373           Jarrett Jack  No 0.079816496
## 376           Harvey Grant  No 0.043422184
## 377            Tony Battie  No 0.026525632
## 380          Herb Williams  No 0.158434171
## 382           Luke Ridnour  No 0.052458630
## 384      Kermit Washington  No 0.011372853
## 386           Greg Anthony  No 0.019385003
## 391        Morris Peterson  No 0.035692569
## 393 Micheal Ray Richardson  No 0.031217080
## 394             Tim Thomas  No 0.049971311
## 400     Corliss Williamson  No 0.032091912
## 401           Mike Woodson  No 0.039939770
## 403          Nazr Mohammed  No 0.019524525
## 407           John Salmons  No 0.073895952
## 412              Dee Brown  No 0.023055155
## 417           Charlie Ward  No 0.014210985
## 425        Benoit Benjamin  No 0.055071862
## 430          Chris Gatling  No 0.013433074
## 431       Gerald Henderson  No 0.032690134
## 434             Beno Udrih  No 0.029109645
## 443              Bob Gross  No 0.010901418
## 445        Lorenzen Wright  No 0.030927066
## 446        Marco Belinelli  No 0.036827027
## 447           Scott Skiles  No 0.022839180
## 456          Allen Leavell  No 0.020781702
## 457       Frank Brickowski  No 0.020890887
## 458          Voshon Lenard  No 0.017757110
## 461            Bonzi Wells  No 0.017097386
## 470            Chuck Hayes  No 0.009532443
## 472         Aaron Williams  No 0.009849129
## 474            Kelvin Cato  No 0.007712389
## 480           Tyreke Evans  No 0.029398399
## 482               Bob Sura  No 0.018545429
## 485            Ricky Davis  No 0.055220581
## 490        Willie Anderson  No 0.019809991
## 491          Nate Robinson  No 0.013714916
## 494        Joe Meriweather  No 0.016147052
## 496        Austin Croshere  No 0.008984439
## 500         Darrell Walker  No 0.031281884
## 509           Andrew Toney  No 0.010965076
## 513        Pooh Richardson  No 0.035895739
## 515            Will Perdue  No 0.010495532
## 518           Kevin Grevey  No 0.017439459
## 520           Billy Paultz  No 0.013129182
## 527        Reggie Williams  No 0.019945534
## 529       Luc Mbah a Moute  No 0.020032897
## 531      Marreese Speights  No 0.007743852
## 534         Josh Childress  No 0.007463889
## 536         Clemon Johnson  No 0.012409744
## 538         Tiago Splitter  No 0.003926973
## 541            Chris Duhon  No 0.018316298
## 543       Dante Cunningham  No 0.016340895
## 545            Reggie King  No 0.008772532
## 546            Rex Chapman  No 0.036789080
## 548        Kevin Duckworth  No 0.025677717
## 549        Martell Webster  No 0.013802056
## 554            Chris Kaman  No 0.037426460
## 562           Ron Anderson  No 0.017034546
## 563           Tracy Murray  No 0.010029937
## 568       Shandon Anderson  No 0.019713235
## 570             Jeremy Lin  No 0.010235695
## 572             Randy Foye  No 0.035162232
## 575         Bill Robinzine  No 0.008908520
## 576             Gary Trent  No 0.006751914
## 581          Desmond Mason  No 0.037420034
## 582     Othella Harrington  No 0.013826359
## 583           Jeff McInnis  No 0.019647843
## 585         Joel Przybilla  No 0.009395319
## 589         Lionel Hollins  No 0.030491657
## 593          Spencer Hawes  No 0.018364473
## 596         Carlos Delfino  No 0.009144670
## 599           Corey Brewer  No 0.032214040
## 606           Popeye Jones  No 0.010909114
## 608            James Silas  No 0.005426029
## 614         Johnny Dawkins  No 0.016295080
## 616             Eddy Curry  No 0.011977898
## 617         Alvin Williams  No 0.010968946
## 618           Nenad Krstić  No 0.007617411
## 619          Tim McCormick  No 0.007408589
## 621         Stromile Swift  No 0.007971917
## 622       Harvey Catchings  No 0.012132994
## 623          Chris Whitney  No 0.006978112
## 628         Terry Catledge  No 0.012925055
## 631           Greg Buckner  No 0.008101326
## 632              Ömer Aşık  No 0.006017147
## 634         Nikola Mirotić  No 0.004612638
## 637          Tony Campbell  No 0.017238856
## 638            John Henson  No 0.005541799
## 639           Keith Bogans  No 0.015372433
## 642       Tyler Hansbrough  No 0.004231955
## 643            Evan Turner  No 0.033155862
## 647          Jason Collins  No 0.016627204
## 651            Eddie House  No 0.010579229
## 652        Jayson Williams  No 0.006679328
## 655          Lamond Murray  No 0.030618476
## 656            Lewis Lloyd  No 0.006223295
## 657        Mickaël Piétrus  No 0.009762525
## 661              Tony Delk  No 0.009343990
## 662          Howard Eisley  No 0.019945534
## 664        Andrew DeClercq  No 0.006729221
## 667            Earl Watson  No 0.036486858
## 677         Nikola Peković  No 0.003879809
## 681         Jerryd Bayless  No 0.010731550
## 682             Nick Young  No 0.021411620
## 683             Glen Davis  No 0.008058435
## 684          Keyon Dooling  No 0.014346567
## 685           Derek Strong  No 0.005344811
## 701         Timofey Mozgov  No 0.005007799
#write_xlsx(player_predictions, "player_predictions.xlsx")