library(caret)
## Warning: package 'caret' was built under R version 4.2.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.2
## Loading required package: lattice
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.2.1
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.2.2
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin

R Markdown

#Load Data
data(Sonar)
set.seed(7)
validationIndex<-createDataPartition(Sonar$Class,p=.80, list=FALSE )
validation<-Sonar[-validationIndex, ]
training<-Sonar[validationIndex, ]
#Create standalone model using all training data
set.seed(7)
finalModel<-randomForest(Class~., training, mtry=2, ntree=2000)
finalModel$confusion
##    M  R class.error
## M 81  8  0.08988764
## R 19 59  0.24358974
#Create a model and summarize model
set.seed(7)
trainControl<-trainControl(method="repeatedcv", number=10, repeats = 3)
fit.rf<-train(Class~., data=training,method="rf", metric="Accuracy", trControl=trainControl, ntree=2000)
print(fit.rf)
## Random Forest 
## 
## 167 samples
##  60 predictor
##   2 classes: 'M', 'R' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 150, 150, 150, 151, 151, 150, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##    2    0.8446078  0.6824883
##   31    0.8283088  0.6511811
##   60    0.8083333  0.6105439
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
print(fit.rf$finalModel)
## 
## Call:
##  randomForest(x = x, y = y, ntree = 2000, mtry = param$mtry) 
##                Type of random forest: classification
##                      Number of trees: 2000
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 14.37%
## Confusion matrix:
##    M  R class.error
## M 84  5  0.05617978
## R 19 59  0.24358974
plot(fit.rf)

plot(finalModel)

# Make predictions on "new data" using the final model
finalPredictions <- predict(finalModel, validation[ , 1:60])
confusionMatrix(finalPredictions, validation$Class)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  M  R
##          M 20  4
##          R  2 15
##                                           
##                Accuracy : 0.8537          
##                  95% CI : (0.7083, 0.9443)
##     No Information Rate : 0.5366          
##     P-Value [Acc > NIR] : 1.883e-05       
##                                           
##                   Kappa : 0.7036          
##                                           
##  Mcnemar's Test P-Value : 0.6831          
##                                           
##             Sensitivity : 0.9091          
##             Specificity : 0.7895          
##          Pos Pred Value : 0.8333          
##          Neg Pred Value : 0.8824          
##              Prevalence : 0.5366          
##          Detection Rate : 0.4878          
##    Detection Prevalence : 0.5854          
##       Balanced Accuracy : 0.8493          
##                                           
##        'Positive' Class : M               
## 
finalPredictionsCV<-predict(fit.rf, validation[, 1:60])
confusionMatrix(finalPredictionsCV, validation$Class)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  M  R
##          M 19  4
##          R  3 15
##                                           
##                Accuracy : 0.8293          
##                  95% CI : (0.6794, 0.9285)
##     No Information Rate : 0.5366          
##     P-Value [Acc > NIR] : 8.511e-05       
##                                           
##                   Kappa : 0.6555          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.8636          
##             Specificity : 0.7895          
##          Pos Pred Value : 0.8261          
##          Neg Pred Value : 0.8333          
##              Prevalence : 0.5366          
##          Detection Rate : 0.4634          
##    Detection Prevalence : 0.5610          
##       Balanced Accuracy : 0.8266          
##                                           
##        'Positive' Class : M               
##