library(party)
set.seed(1234)
#create indices 1 & 2 for sampling the dataset
ind=sample(2,nrow(iris),replace=TRUE,prob=c(0.7,0.3))
trainData=iris[ind==1,]
testData=iris[ind==2,]
#Build a model
myFormula=Species ~ Sepal.Length+Sepal.Width+Petal.Length+Petal.Width
iris_ctree=ctree(myFormula,data=trainData)
#check the prediction with the training data
table(predict(iris_ctree),trainData$Species)
setosa versicolor virginica
setosa 40 0 0
versicolor 0 37 3
virginica 0 1 31
#plot the decision tree
plot(iris_ctree,type="simple")
#predict on test data
testPred=predict(iris_ctree,newdata=testData)
#show the confusion matrix
table(testPred,testData$Species)
testPred setosa versicolor virginica
setosa 10 0 0
versicolor 0 12 2
virginica 0 0 14
library(randomForest)
set.seed(1234)
#create indices 1 & 2 for sampling the dataset
ind=sample(2,nrow(iris),replace=TRUE,prob=c(0.7,0.3))
trainData=iris[ind==1,]
testData=iris[ind==2,]
rf=randomForest(Species ~ .,data=trainData,ntree=100,proximity=TRUE)
table(predict(rf),trainData$Species)
setosa versicolor virginica
setosa 40 0 0
versicolor 0 35 2
virginica 0 3 32
print(rf)
Call:
randomForest(formula = Species ~ ., data = trainData, ntree = 100, proximity = TRUE)
Type of random forest: classification
Number of trees: 100
No. of variables tried at each split: 2
OOB estimate of error rate: 4.46%
Confusion matrix:
setosa versicolor virginica class.error
setosa 40 0 0 0.00000000
versicolor 0 35 3 0.07894737
virginica 0 2 32 0.05882353
attributes(rf)
$names
[1] "call" "type" "predicted"
[4] "err.rate" "confusion" "votes"
[7] "oob.times" "classes" "importance"
[10] "importanceSD" "localImportance" "proximity"
[13] "ntree" "mtry" "forest"
[16] "y" "test" "inbag"
[19] "terms"
$class
[1] "randomForest.formula" "randomForest"
plot(rf)
importance(rf)
MeanDecreaseGini
Sepal.Length 8.129141
Sepal.Width 1.253981
Petal.Length 31.235275
Petal.Width 33.198115
varImpPlot(rf)
irisPred=predict(rf,newdata=testData)
table(irisPred,testData$Species)
irisPred setosa versicolor virginica
setosa 10 0 0
versicolor 0 12 2
virginica 0 0 14
plot(margin(rf,testData$Species))