library(caret)
## Warning: package 'caret' was built under R version 4.2.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.2
## Loading required package: lattice
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.2.1
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.2.2
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
R Markdown
#Load Data
data(Sonar)
set.seed(7)
validationIndex<-createDataPartition(Sonar$Class,p=.80, list=FALSE )
validation<-Sonar[-validationIndex, ]
training<-Sonar[validationIndex, ]
#Create standalone model using all training data
set.seed(7)
finalModel<-randomForest(Class~., training, mtry=2, ntree=2000)
finalModel$confusion
## M R class.error
## M 81 8 0.08988764
## R 19 59 0.24358974
#Create a model and summarize model
set.seed(7)
trainControl<-trainControl(method="repeatedcv", number=10, repeats = 3)
fit.rf<-train(Class~., data=training,method="rf", metric="Accuracy", trControl=trainControl, ntree=2000)
print(fit.rf)
## Random Forest
##
## 167 samples
## 60 predictor
## 2 classes: 'M', 'R'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 150, 150, 150, 151, 151, 150, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.8446078 0.6824883
## 31 0.8283088 0.6511811
## 60 0.8083333 0.6105439
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
print(fit.rf$finalModel)
##
## Call:
## randomForest(x = x, y = y, ntree = 2000, mtry = param$mtry)
## Type of random forest: classification
## Number of trees: 2000
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 14.37%
## Confusion matrix:
## M R class.error
## M 84 5 0.05617978
## R 19 59 0.24358974
plot(fit.rf)

plot(finalModel)

# Make predictions on "new data" using the final model
finalPredictions <- predict(finalModel, validation[ , 1:60])
confusionMatrix(finalPredictions, validation$Class)
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 20 4
## R 2 15
##
## Accuracy : 0.8537
## 95% CI : (0.7083, 0.9443)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 1.883e-05
##
## Kappa : 0.7036
##
## Mcnemar's Test P-Value : 0.6831
##
## Sensitivity : 0.9091
## Specificity : 0.7895
## Pos Pred Value : 0.8333
## Neg Pred Value : 0.8824
## Prevalence : 0.5366
## Detection Rate : 0.4878
## Detection Prevalence : 0.5854
## Balanced Accuracy : 0.8493
##
## 'Positive' Class : M
##
finalPredictionsCV<-predict(fit.rf, validation[, 1:60])
confusionMatrix(finalPredictionsCV, validation$Class)
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 19 4
## R 3 15
##
## Accuracy : 0.8293
## 95% CI : (0.6794, 0.9285)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 8.511e-05
##
## Kappa : 0.6555
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.8636
## Specificity : 0.7895
## Pos Pred Value : 0.8261
## Neg Pred Value : 0.8333
## Prevalence : 0.5366
## Detection Rate : 0.4634
## Detection Prevalence : 0.5610
## Balanced Accuracy : 0.8266
##
## 'Positive' Class : M
##