This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
# For manipulating the datasets
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(readxl)
# For plotting correlation matrix
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.0.3
## Loading required package: ggplot2
# Machine Learning library
library(caret)
## Warning: package 'caret' was built under R version 4.0.3
## Loading required package: lattice
# For Multi-core processing support
library(parallel)
numCores <- detectCores()
#Numerical dataset
dataset_num <- read_excel("rice.xlsx")
#Categorical dataset
dataset_cat <- read.csv("mushrooms.csv")
#Mix dataset
dataset_mix <- read_excel("bank.xlsx")
dataset_cat %>% group_by(VEIL.TYPE) %>% summarise(total=n())
## `summarise()` ungrouping output (override with `.groups` argument)
#Eliminate VEIL.TYPE since it only has one value
dataset_cat <- dataset_cat %>% select(-VEIL.TYPE)
dataset_cat %>% group_by(STALK.ROOT) %>% summarise(total=n())
## `summarise()` ungrouping output (override with `.groups` argument)
#Eliminate STALK.ROOT since it has missing values
dataset_cat <- dataset_cat %>% select(-STALK.ROOT)
#dataset <- dataset_num
dataset <- dataset_cat
#dataset <- dataset_mix
dataset
trainIndex <- createDataPartition(dataset$CLASS, p=0.80, list=FALSE)
data_train <- dataset[ trainIndex,]
data_test <- dataset[-trainIndex,]
fitControl <- trainControl(method="cv",
repeats=1,
number=5,
# summaryFunction=twoClassSummary,
verboseIter=T,
classProbs=F,
allowParallel = TRUE)
## Warning: `repeats` has no meaning for this resampling method.
train_formula<-formula(CLASS~.)
rfFitupsam<- train(train_formula,
data = data_train,
method = "rf",
#tuneLength = 9,
#tuneGrid = svmGrid,
#preProcess=c("scale","center"),
#metric="ROC",
#weights = model_weights,
trControl = fitControl)
## + Fold1: mtry= 2
## - Fold1: mtry= 2
## + Fold1: mtry=46
## - Fold1: mtry=46
## + Fold1: mtry=91
## - Fold1: mtry=91
## + Fold2: mtry= 2
## - Fold2: mtry= 2
## + Fold2: mtry=46
## - Fold2: mtry=46
## + Fold2: mtry=91
## - Fold2: mtry=91
## + Fold3: mtry= 2
## - Fold3: mtry= 2
## + Fold3: mtry=46
## - Fold3: mtry=46
## + Fold3: mtry=91
## - Fold3: mtry=91
## + Fold4: mtry= 2
## - Fold4: mtry= 2
## + Fold4: mtry=46
## - Fold4: mtry=46
## + Fold4: mtry=91
## - Fold4: mtry=91
## + Fold5: mtry= 2
## - Fold5: mtry= 2
## + Fold5: mtry=46
## - Fold5: mtry=46
## + Fold5: mtry=91
## - Fold5: mtry=91
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 46 on full training set
rfFitupsam
## Random Forest
##
## 6500 samples
## 20 predictor
## 2 classes: 'e', 'p'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 5200, 5200, 5200, 5201, 5199
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.9480015 0.8954717
## 46 1.0000000 1.0000000
## 91 1.0000000 1.0000000
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 46.
importance <- varImp(rfFitupsam, scale=FALSE)
plot(importance)
predsrfprobsamp=predict(rfFitupsam,data_test)
confusionMatrix(predsrfprobsamp,as.factor(data_test$CLASS))
## Confusion Matrix and Statistics
##
## Reference
## Prediction e p
## e 841 0
## p 0 783
##
## Accuracy : 1
## 95% CI : (0.9977, 1)
## No Information Rate : 0.5179
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 1.0000
## Prevalence : 0.5179
## Detection Rate : 0.5179
## Detection Prevalence : 0.5179
## Balanced Accuracy : 1.0000
##
## 'Positive' Class : e
##