library(caret)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Loading required package: lattice
library(klaR)
## Loading required package: MASS
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
# -------------------------------
# Load data (KNIT-SAFE PATH)
# -------------------------------
DATA <- read.csv(
  "/Users/maxineharlemon/AIOpt/mushroom_dataset/agaricus-lepiota.data",
  header = FALSE,
  stringsAsFactors = TRUE
)

colnames(DATA) <- c(
  "class", "cap_shape", "cap_surface", "cap_color", "bruises",
  "odor", "gill_attachment", "gill_spacing", "gill_size",
  "gill_color", "stalk_shape", "stalk_root",
  "stalk_surface_above_ring", "stalk_surface_below_ring",
  "stalk_color_above_ring", "stalk_color_below_ring",
  "veil_type", "veil_color", "ring_number", "ring_type",
  "spore_print_color", "population", "habitat"
)

DATA$class <- factor(DATA$class, levels = c("e", "p"))

# -------------------------------
# Train / test split
# -------------------------------
set.seed(123)
idx <- createDataPartition(DATA$class, p = 0.80, list = FALSE)
train_DATA <- DATA[idx, ]
test_DATA  <- DATA[-idx, ]

# -------------------------------
# REMOVE SINGLE-LEVEL PREDICTORS
# -------------------------------
nzv <- nearZeroVar(train_DATA)
train_CLEAN <- train_DATA[, -nzv]
test_CLEAN  <- test_DATA[, -nzv]

# -------------------------------
# 10-fold CV
# -------------------------------
ctrl <- trainControl(
  method = "cv",
  number = 10
)

# -------------------------------
# Naive Bayes (caret)
# -------------------------------
nb_cv <- train(
  class ~ .,
  data = train_CLEAN,
  method = "nb",
  trControl = ctrl
)
## Warning: model fit failed for Fold01: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold02: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold03: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold04: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold05: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold06: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold07: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold08: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold09: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold10: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
nb_pred <- predict(nb_cv, test_CLEAN)
nb_cm <- confusionMatrix(nb_pred, test_CLEAN$class)

# -------------------------------
# Random Forest (caret)
# -------------------------------
rf_cv <- train(
  class ~ .,
  data = train_CLEAN,
  method = "rf",
  trControl = ctrl,
  ntree = 500
)

rf_pred <- predict(rf_cv, test_CLEAN)
rf_cm <- confusionMatrix(rf_pred, test_CLEAN$class)

# -------------------------------
# Compare results
# -------------------------------
results <- data.frame(
  Model = c("Naive Bayes", "Random Forest"),
  Accuracy = c(
    nb_cm$overall["Accuracy"],
    rf_cm$overall["Accuracy"]
  ),
  Kappa = c(
    nb_cm$overall["Kappa"],
    rf_cm$overall["Kappa"]
  )
)

results
##           Model  Accuracy     Kappa
## 1   Naive Bayes 0.9137931 0.8263412
## 2 Random Forest 1.0000000 1.0000000