Decision Tree
x0<-c(4.3,3.9,2.7,6.6,6.5,2.7)
x1<-c(4.9,6.1,4.8,4.4,2.9,6.7)
x2<-c(4.1,5.9,4.1,4.5,4.7,4.2)
x3<-c(4.7,5.5,5.0,3.9,4.6,5.3)
x4<-c(5.5,5.9,5.6,5.9,6.1,4.8)
y<-as.factor(c(0,0,0,1,1,1))
#y<-(c(0,0,0,1,1,1))
id<-1:6
(data<-data.frame(x0,x1,x2,x3,x4,y))
## x0 x1 x2 x3 x4 y
## 1 4.3 4.9 4.1 4.7 5.5 0
## 2 3.9 6.1 5.9 5.5 5.9 0
## 3 2.7 4.8 4.1 5.0 5.6 0
## 4 6.6 4.4 4.5 3.9 5.9 1
## 5 6.5 2.9 4.7 4.6 6.1 1
## 6 2.7 6.7 4.2 5.3 4.8 1
library(rpart)
fit <- rpart(y~x0+x1+x2+x3+x4,minsplit=1)
library(rpart.plot)
## Warning: 套件 'rpart.plot' 是用 R 版本 4.3.1 來建造的
#require(rpart.plot)
prp(fit, # 模型
faclen=0, # 呈現的變數不要縮寫
fallen.leaves=TRUE, # 讓樹枝以垂直方式呈現
shadow.col="gray", # 最下面的節點塗上陰影
# number of correct classifications / number of observations in that node
extra=2)

x0.2<-x0
x0.2[2]<-6.5
x1.2<-x1
x1.2[2]<-4.1
fit2 <- rpart(y~x0.2+x1.2+x2+x3+x4,minsplit=1)
prp(fit2, # 模型
faclen=0, # 呈現的變數不要縮寫
fallen.leaves=TRUE, # 讓樹枝以垂直方式呈現
shadow.col="gray", # 最下面的節點塗上陰影
# number of correct classifications / number of observations in that node
extra=2)

set.seed(1234)
id.boot1<-sample(id,6,replace=T)
id.boot2<-sample(id,6,replace=T)
id.boot3<-sample(id,6,replace=T)
id.boot4<-sample(id,6,replace=T)
id.boot5<-sample(id,6,replace=T)
var.s1<-sample(0:4,2)
var.s2<-sample(0:4,2)
var.s3<-sample(0:4,2)
var.s4<-sample(0:4,2)
var.s5<-sample(0:4,2)
cbind(id.boot1,id.boot2,id.boot3,id.boot4,id.boot5)
## id.boot1 id.boot2 id.boot3 id.boot4 id.boot5
## [1,] 4 5 6 4 5
## [2,] 2 6 6 4 2
## [3,] 6 4 4 5 5
## [4,] 5 2 6 4 2
## [5,] 4 6 6 3 6
## [6,] 1 2 6 4 3
cbind(var.s1,var.s2,var.s3,var.s4,var.s5)
## var.s1 var.s2 var.s3 var.s4 var.s5
## [1,] 3 3 0 3 1
## [2,] 4 2 2 4 2
fit.b1<- rpart(y~x3+x4,data=data[id.boot1,],minsplit=1)
fit.b2<- rpart(y~x3+x2,data=data[id.boot2,],minsplit=1)
fit.b3<- rpart(y~x2+x0,data=data[id.boot3,],minsplit=1)
fit.b4<- rpart(y~x4+x3,data=data[id.boot4,],minsplit=1)
fit.b5<- rpart(y~x2+x1,data=data[id.boot4,],minsplit=1)
par(mfrow=c(1,5))
prp(fit.b1, # 模型
faclen=0, # 呈現的變數不要縮寫
fallen.leaves=TRUE, # 讓樹枝以垂直方式呈現
shadow.col="gray", # 最下面的節點塗上陰影
# number of correct classifications / number of observations in that node
extra=2)
prp(fit.b2, # 模型
faclen=0, # 呈現的變數不要縮寫
fallen.leaves=TRUE, # 讓樹枝以垂直方式呈現
shadow.col="gray", # 最下面的節點塗上陰影
# number of correct classifications / number of observations in that node
extra=2)
prp(fit.b3, # 模型
faclen=0, # 呈現的變數不要縮寫
fallen.leaves=TRUE, # 讓樹枝以垂直方式呈現
shadow.col="gray", # 最下面的節點塗上陰影
# number of correct classifications / number of observations in that node
extra=2)
prp(fit.b4, # 模型
faclen=0, # 呈現的變數不要縮寫
fallen.leaves=TRUE, # 讓樹枝以垂直方式呈現
shadow.col="gray", # 最下面的節點塗上陰影
# number of correct classifications / number of observations in that node
extra=2)
prp(fit.b5, # 模型
faclen=0, # 呈現的變數不要縮寫
fallen.leaves=TRUE, # 讓樹枝以垂直方式呈現
shadow.col="gray", # 最下面的節點塗上陰影
# number of correct classifications / number of observations in that node
extra=2)

Random Forest
#install.packages("randomForest")
library(randomForest)
## Warning: 套件 'randomForest' 是用 R 版本 4.3.1 來建造的
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
mod.rf <-randomForest(y~.,data=data,ntree=800)
mod.rf
##
## Call:
## randomForest(formula = y ~ ., data = data, ntree = 800)
## Type of random forest: classification
## Number of trees: 800
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 33.33%
## Confusion matrix:
## 0 1 class.error
## 0 2 1 0.3333333
## 1 1 2 0.3333333
plot(mod.rf)

test.data<-data.frame(x0=c(2.8,2.7,6),x1=c(3,6.2,2),x2=c(2,4.3,6),x3=c(6,5.3,1),x4=c(3,5.5,5))
(r<-predict(mod.rf,test.data,type="prob"))
## 0 1
## 1 0.62875 0.37125
## 2 0.60125 0.39875
## 3 0.20250 0.79750
## attr(,"class")
## [1] "matrix" "array" "votes"