# library
library(caret)
# ppg
nba_data <- nba_data %>%
mutate(Above_Current_PPG = ifelse(PPG > median(PPG, na.rm = TRUE), 'Above', 'Below'))
set.seed(123)
nba_data_ml <- nba_data %>%
select(Above_Current_PPG, Age)
data_ml <- na.omit(nba_data_ml)
trainIndex <- createDataPartition(data_ml$Above_Current_PPG, p = .8, list = FALSE)
trainData <- data_ml[trainIndex,]
testData <- data_ml[-trainIndex,]
# logistic trainer
model_logistic <- train(Above_Current_PPG ~ Age, data = trainData, method = "glm", family = "binomial")
# test
predictions <- predict(model_logistic, newdata = testData)
predictions <- factor(predictions, levels = c("Above", "Below"))
testData$Above_Current_PPG <- factor(testData$Above_Current_PPG, levels = c("Above", "Below"))
confusionMatrix(predictions, testData$Above_Current_PPG)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Above Below
## Above 26 22
## Below 27 32
##
## Accuracy : 0.5421
## 95% CI : (0.443, 0.6388)
## No Information Rate : 0.5047
## P-Value [Acc > NIR] : 0.2494
##
## Kappa : 0.0832
##
## Mcnemar's Test P-Value : 0.5677
##
## Sensitivity : 0.4906
## Specificity : 0.5926
## Pos Pred Value : 0.5417
## Neg Pred Value : 0.5424
## Prevalence : 0.4953
## Detection Rate : 0.2430
## Detection Prevalence : 0.4486
## Balanced Accuracy : 0.5416
##
## 'Positive' Class : Above
##