library(caret)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Loading required package: lattice
library(klaR)
## Loading required package: MASS
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
# -------------------------------
# Load data (KNIT-SAFE PATH)
# -------------------------------
DATA <- read.csv(
"/Users/maxineharlemon/AIOpt/mushroom_dataset/agaricus-lepiota.data",
header = FALSE,
stringsAsFactors = TRUE
)
colnames(DATA) <- c(
"class", "cap_shape", "cap_surface", "cap_color", "bruises",
"odor", "gill_attachment", "gill_spacing", "gill_size",
"gill_color", "stalk_shape", "stalk_root",
"stalk_surface_above_ring", "stalk_surface_below_ring",
"stalk_color_above_ring", "stalk_color_below_ring",
"veil_type", "veil_color", "ring_number", "ring_type",
"spore_print_color", "population", "habitat"
)
DATA$class <- factor(DATA$class, levels = c("e", "p"))
# -------------------------------
# Train / test split
# -------------------------------
set.seed(123)
idx <- createDataPartition(DATA$class, p = 0.80, list = FALSE)
train_DATA <- DATA[idx, ]
test_DATA <- DATA[-idx, ]
# -------------------------------
# REMOVE SINGLE-LEVEL PREDICTORS
# -------------------------------
nzv <- nearZeroVar(train_DATA)
train_CLEAN <- train_DATA[, -nzv]
test_CLEAN <- test_DATA[, -nzv]
# -------------------------------
# 10-fold CV
# -------------------------------
ctrl <- trainControl(
method = "cv",
number = 10
)
# -------------------------------
# Naive Bayes (caret)
# -------------------------------
nb_cv <- train(
class ~ .,
data = train_CLEAN,
method = "nb",
trControl = ctrl
)
## Warning: model fit failed for Fold01: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold02: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold03: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold04: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold05: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold06: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold07: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold08: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold09: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning: model fit failed for Fold10: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: cap_shapec, cap_shapes, cap_surfaceg, cap_colorr, cap_coloru, odorc, odorf, odorl, odorm, odorp, odors, odory, gill_colore, gill_coloro, gill_colorr, stalk_rootr, stalk_color_above_ringc, stalk_color_above_ringe, stalk_color_above_ringg, stalk_color_above_ringo, stalk_color_above_ringy, stalk_color_below_ringc, stalk_color_below_ringe, stalk_color_below_ringg, stalk_color_below_ringo, stalk_color_below_ringy, ring_typef, ring_typel, ring_typen, spore_print_coloro, spore_print_colorr, spore_print_coloru, spore_print_colory, populationn, habitatw
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
nb_pred <- predict(nb_cv, test_CLEAN)
nb_cm <- confusionMatrix(nb_pred, test_CLEAN$class)
# -------------------------------
# Random Forest (caret)
# -------------------------------
rf_cv <- train(
class ~ .,
data = train_CLEAN,
method = "rf",
trControl = ctrl,
ntree = 500
)
rf_pred <- predict(rf_cv, test_CLEAN)
rf_cm <- confusionMatrix(rf_pred, test_CLEAN$class)
# -------------------------------
# Compare results
# -------------------------------
results <- data.frame(
Model = c("Naive Bayes", "Random Forest"),
Accuracy = c(
nb_cm$overall["Accuracy"],
rf_cm$overall["Accuracy"]
),
Kappa = c(
nb_cm$overall["Kappa"],
rf_cm$overall["Kappa"]
)
)
results
## Model Accuracy Kappa
## 1 Naive Bayes 0.9137931 0.8263412
## 2 Random Forest 1.0000000 1.0000000