Random Forest Classifier

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

library(mlbench)

## Warning: package 'mlbench' was built under R version 4.4.3

data("BreastCancer")
str(BreastCancer)

## 'data.frame':    699 obs. of  11 variables:
##  $ Id             : chr  "1000025" "1002945" "1015425" "1016277" ...
##  $ Cl.thickness   : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 5 5 3 6 4 8 1 2 2 4 ...
##  $ Cell.size      : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 1 4 1 8 1 10 1 1 1 2 ...
##  $ Cell.shape     : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 1 4 1 8 1 10 1 2 1 1 ...
##  $ Marg.adhesion  : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 1 5 1 1 3 8 1 1 1 1 ...
##  $ Epith.c.size   : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 2 7 2 3 2 7 2 2 2 2 ...
##  $ Bare.nuclei    : Factor w/ 10 levels "1","2","3","4",..: 1 10 2 4 1 10 10 1 1 1 ...
##  $ Bl.cromatin    : Factor w/ 10 levels "1","2","3","4",..: 3 3 3 3 3 9 3 3 1 2 ...
##  $ Normal.nucleoli: Factor w/ 10 levels "1","2","3","4",..: 1 2 1 7 1 7 1 1 1 1 ...
##  $ Mitoses        : Factor w/ 9 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 5 1 ...
##  $ Class          : Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...

levels(BreastCancer$Class)

## [1] "benign"    "malignant"

summary(BreastCancer)

##       Id             Cl.thickness   Cell.size     Cell.shape  Marg.adhesion
##  Length:699         1      :145   1      :384   1      :353   1      :407  
##  Class :character   5      :130   10     : 67   2      : 59   2      : 58  
##  Mode  :character   3      :108   3      : 52   10     : 58   3      : 58  
##                     4      : 80   2      : 45   3      : 56   10     : 55  
##                     10     : 69   4      : 40   4      : 44   4      : 33  
##                     2      : 50   5      : 30   5      : 34   8      : 25  
##                     (Other):117   (Other): 81   (Other): 95   (Other): 63  
##   Epith.c.size  Bare.nuclei   Bl.cromatin  Normal.nucleoli    Mitoses   
##  2      :386   1      :402   2      :166   1      :443     1      :579  
##  3      : 72   10     :132   3      :165   10     : 61     2      : 35  
##  4      : 48   2      : 30   1      :152   3      : 44     3      : 33  
##  1      : 47   5      : 30   7      : 73   2      : 36     10     : 14  
##  6      : 41   3      : 28   4      : 40   8      : 24     4      : 12  
##  5      : 39   (Other): 61   5      : 34   6      : 22     7      :  9  
##  (Other): 66   NA's   : 16   (Other): 69   (Other): 69     (Other): 17  
##        Class    
##  benign   :458  
##  malignant:241  
##                 
##                 
##                 
##                 
##

library(mice)

## Warning: package 'mice' was built under R version 4.4.3

## 
## Attaching package: 'mice'

## The following object is masked from 'package:stats':
## 
##     filter

## The following objects are masked from 'package:base':
## 
##     cbind, rbind

library(caret)

## Warning: package 'caret' was built under R version 4.4.3

## Loading required package: ggplot2

## Loading required package: lattice

dataset_impute <- mice(BreastCancer[,2:10],  print = FALSE)
BreastCancer <- cbind(BreastCancer[,11, drop = FALSE], mice::complete(dataset_impute, 1))

summary(BreastCancer)

##        Class      Cl.thickness   Cell.size     Cell.shape  Marg.adhesion
##  benign   :458   1      :145   1      :384   1      :353   1      :407  
##  malignant:241   5      :130   10     : 67   2      : 59   2      : 58  
##                  3      :108   3      : 52   10     : 58   3      : 58  
##                  4      : 80   2      : 45   3      : 56   10     : 55  
##                  10     : 69   4      : 40   4      : 44   4      : 33  
##                  2      : 50   5      : 30   5      : 34   8      : 25  
##                  (Other):117   (Other): 81   (Other): 95   (Other): 63  
##   Epith.c.size  Bare.nuclei   Bl.cromatin  Normal.nucleoli    Mitoses   
##  2      :386   1      :413   2      :166   1      :443     1      :579  
##  3      : 72   10     :133   3      :165   10     : 61     2      : 35  
##  4      : 48   2      : 31   1      :152   3      : 44     3      : 33  
##  1      : 47   5      : 30   7      : 73   2      : 36     10     : 14  
##  6      : 41   3      : 28   4      : 40   8      : 24     4      : 12  
##  5      : 39   8      : 21   5      : 34   6      : 22     7      :  9  
##  (Other): 66   (Other): 43   (Other): 69   (Other): 69     (Other): 17

library(caTools)

## Warning: package 'caTools' was built under R version 4.4.3

set.seed(150) 
split=sample.split(BreastCancer, SplitRatio = 0.7)
training_set=subset(BreastCancer,split==TRUE)
test_set=subset(BreastCancer,split==FALSE)

dim(training_set)

## [1] 490  10

dim(test_set)

## [1] 209  10

topredict_set<-test_set[2:10] 
dim(topredict_set)

## [1] 209   9

library(randomForest)

## Warning: package 'randomForest' was built under R version 4.4.3

## randomForest 4.7-1.2

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following object is masked from 'package:ggplot2':
## 
##     margin

model_rf <- randomForest(Class ~ ., data = training_set, importance=TRUE, ntree = 5)
preds_rf <- predict(model_rf, topredict_set)
(conf_matrix_forest <- table(preds_rf, test_set$Class))

##            
## preds_rf    benign malignant
##   benign       125         2
##   malignant     10        72

confusionMatrix(conf_matrix_forest)

## Confusion Matrix and Statistics
## 
##            
## preds_rf    benign malignant
##   benign       125         2
##   malignant     10        72
##                                         
##                Accuracy : 0.9426        
##                  95% CI : (0.9019, 0.97)
##     No Information Rate : 0.6459        
##     P-Value [Acc > NIR] : < 2e-16       
##                                         
##                   Kappa : 0.8775        
##                                         
##  Mcnemar's Test P-Value : 0.04331       
##                                         
##             Sensitivity : 0.9259        
##             Specificity : 0.9730        
##          Pos Pred Value : 0.9843        
##          Neg Pred Value : 0.8780        
##              Prevalence : 0.6459        
##          Detection Rate : 0.5981        
##    Detection Prevalence : 0.6077        
##       Balanced Accuracy : 0.9494        
##                                         
##        'Positive' Class : benign        
##

library(rpart)

## Warning: package 'rpart' was built under R version 4.4.3

model_dtree<- rpart(Class ~ ., data=training_set)
preds_dtree <- predict(model_dtree,newdata=topredict_set, type = "class")
(conf_matrix_dtree <- table(preds_dtree, test_set$Class))

##            
## preds_dtree benign malignant
##   benign       127         5
##   malignant      8        69

confusionMatrix(conf_matrix_dtree)

## Confusion Matrix and Statistics
## 
##            
## preds_dtree benign malignant
##   benign       127         5
##   malignant      8        69
##                                          
##                Accuracy : 0.9378         
##                  95% CI : (0.896, 0.9665)
##     No Information Rate : 0.6459         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.8652         
##                                          
##  Mcnemar's Test P-Value : 0.5791         
##                                          
##             Sensitivity : 0.9407         
##             Specificity : 0.9324         
##          Pos Pred Value : 0.9621         
##          Neg Pred Value : 0.8961         
##              Prevalence : 0.6459         
##          Detection Rate : 0.6077         
##    Detection Prevalence : 0.6316         
##       Balanced Accuracy : 0.9366         
##                                          
##        'Positive' Class : benign         
##

Random Forest Classifier

naema winde

2025-03-12

R Markdown

Including Plots