Load Libraries for the Random Forest Project
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.2.2
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
library(caret)
## Warning: package 'caret' was built under R version 4.2.1
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
## Loading required package: lattice
library(ggplot2)
library(rpart)
## Warning: package 'rpart' was built under R version 4.2.2
Read in the dataset and understand the data structure (Lset
data)
Lset <- read.csv("D:/otherset.csv")
str(Lset)
## 'data.frame': 10 obs. of 4 variables:
## $ Left : int 1 0 1 0 0 0 0 1 1 0
## $ Right: int 45 0 92 18 26 48 41 52 64 80
## $ Up : int 24 26 32 41 80 76 92 39 46 50
## $ Down : int 100 69 46 24 0 32 86 71 65 48
set.seed(1234)
Lset$Left <- as.factor(Lset$Left)
Create 80%/20% for training and validation datasets
validationIndex <- createDataPartition(Lset$Left,p=0.80, list=FALSE)
validation <- Lset[-validationIndex, ]
Ltrain <- Lset[validationIndex, ]
set.seed(123456)
rf <- randomForest(Left ~., Ltrain, mtry=3, ntree=500)
rf
##
## Call:
## randomForest(formula = Left ~ ., data = Ltrain, mtry = 3, ntree = 500)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 44.44%
## Confusion matrix:
## 0 1 class.error
## 0 3 2 0.4
## 1 2 2 0.5
Make a prediction
pred <- predict(m_rf, Lset)
table(pred,Lset$Left)
##
## pred 0 1
## 0 6 0
## 1 0 4
plot(m_rf)

Random Forest Project with Iris Dataset
Load packages
library(rpart)
library(caret)
library(ggplot2)
library(randomForest)
## Load Iris Dataset
Irset <- read.csv("D:/irisr.csv")
set.seed(1000)
Irset$Species <- as.factor(Irset$Species)
str(Irset)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "Setosa","Versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
head(Irset)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 Setosa
## 2 4.9 3.0 1.4 0.2 Setosa
## 3 4.7 3.2 1.3 0.2 Setosa
## 4 4.6 3.1 1.5 0.2 Setosa
## 5 5.0 3.6 1.4 0.2 Setosa
## 6 5.4 3.9 1.7 0.4 Setosa
Create a 80%/20% for training and validation of dataset
val_base <- createDataPartition(Irset$Species,p=0.80, list=FALSE)
valid <- Irset[-val_base, ]
ir_train <- Irset[val_base, ]
set.seed(1000)
ir_rf <- randomForest(Species~., ir_train, mtry=4, importance=TRUE, ntree=500)
ir_rf
##
## Call:
## randomForest(formula = Species ~ ., data = ir_train, mtry = 4, importance = TRUE, ntree = 500)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 4
##
## OOB estimate of error rate: 4.17%
## Confusion matrix:
## Setosa Versicolor Virginica class.error
## Setosa 40 0 0 0.000
## Versicolor 0 38 2 0.050
## Virginica 0 3 37 0.075
## Evaluate the importance
importance(ir_rf)
## Setosa Versicolor Virginica MeanDecreaseAccuracy
## Sepal.Length 0.00000 -0.8245351 3.678355 3.179503
## Sepal.Width 0.00000 -5.0667707 6.164603 1.975852
## Petal.Length 24.36507 33.9329757 29.518567 35.975324
## Petal.Width 22.47154 30.0467381 31.948066 33.638813
## MeanDecreaseGini
## Sepal.Length 0.7272981
## Sepal.Width 1.0726318
## Petal.Length 40.0725004
## Petal.Width 37.4144365
Make a Prediction
pred_iris <- predict(irm_rf,Irset)
table(pred_iris,Irset$Species)
##
## pred_iris Setosa Versicolor Virginica
## Setosa 50 0 0
## Versicolor 0 49 2
## Virginica 0 1 48
Visualize the Prediction
plot(Irset)

plot(irm_rf)
