Implementasi Decision Tree dan Naive-Bayes

Decision Tree menggunakan package party

library(party) 
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
Data <- iris
View(Data)
# membagi data training dan data testing
Sample <- sample (1:150, 50) 
testing <- Data [Sample, ]
learning <- Data [-Sample, ]

output.tree <- ctree (Species~ Sepal.Length + Sepal.Width +Petal.Length+ Petal.Width, data = learning)

#menampilkan model tree yang terbentuk 
plot(output.tree)

Naive Bayes Naive Bayes

library(e1071)
#input data training 
data.training=as.data.frame(rbind(
c("Sunny", "Hot", "High", "False", "No"), 
c("Sunny", "Hot", "High", "True", "No"),
c("Overcast", "Hot", "High", "False", "Yes"),
c("Rainy", "Mild", "High", "False", "Yes"), 
c("Rainy", "Cool", "Normal", "False", "Yes"),
c("Rainy", "Cool", "Normal", "True", "No"),
c("Overcast", "Cool", "Normal", "True", "Yes"),
c("Sunny", "Mild", "High", "False", "No"), 
c("Sunny", "Cool", "Normal", "False", "Yes"), 
c("Rainy", "Mild", "Normal", "False", "Yes"), 
c("Sunny", "Mild", "Normal", "True", "Yes"),
c("Overcast", "Mild", "High", "True", "Yes"), 
c("Overcast", "Hot", "Normal", "False", "Yes"),
c("Rainy", "Mild", "High", "True", "No"))) 

# memberi nama kolom
names (data.training) [1] = "OUTLOOK"
names (data.training) [2] = "TEMP"
names (data.training) [3] = "HUMIDITY"
names (data.training) [4] = "WINDY"
names (data.training) [5] = "PLAY"
head(data.training)
##    OUTLOOK TEMP HUMIDITY WINDY PLAY
## 1    Sunny  Hot     High False   No
## 2    Sunny  Hot     High  True   No
## 3 Overcast  Hot     High False  Yes
## 4    Rainy Mild     High False  Yes
## 5    Rainy Cool   Normal False  Yes
## 6    Rainy Cool   Normal  True   No
# input data testing
data.test = as.data.frame(cbind("Sunny", "Cool", "High", "True"))
names (data.test) [1] = "OUTLOOK"
names (data.test) [2] = "TEMP"
names (data.test) [3] = "HUMIDITY"
names (data.test) [4] = "WINDY"
#membuat model
model<-naiveBayes(PLAY~ ., data=data.training)
print(model)
## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
## 
## A-priori probabilities:
## Y
##        No       Yes 
## 0.3571429 0.6428571 
## 
## Conditional probabilities:
##      OUTLOOK
## Y      Overcast     Rainy     Sunny
##   No  0.0000000 0.4000000 0.6000000
##   Yes 0.4444444 0.3333333 0.2222222
## 
##      TEMP
## Y          Cool       Hot      Mild
##   No  0.2000000 0.4000000 0.4000000
##   Yes 0.3333333 0.2222222 0.4444444
## 
##      HUMIDITY
## Y          High    Normal
##   No  0.8000000 0.2000000
##   Yes 0.3333333 0.6666667
## 
##      WINDY
## Y         False      True
##   No  0.4000000 0.6000000
##   Yes 0.6666667 0.3333333
#melakukan klasifikasi data testing (prediksi)
predict_result<-predict(model,data.test)
print(predict_result)
## [1] No
## Levels: No Yes

Naive Bayes Manually

#Membuat cross tabulasi
data.training
##     OUTLOOK TEMP HUMIDITY WINDY PLAY
## 1     Sunny  Hot     High False   No
## 2     Sunny  Hot     High  True   No
## 3  Overcast  Hot     High False  Yes
## 4     Rainy Mild     High False  Yes
## 5     Rainy Cool   Normal False  Yes
## 6     Rainy Cool   Normal  True   No
## 7  Overcast Cool   Normal  True  Yes
## 8     Sunny Mild     High False   No
## 9     Sunny Cool   Normal False  Yes
## 10    Rainy Mild   Normal False  Yes
## 11    Sunny Mild   Normal  True  Yes
## 12 Overcast Mild     High  True  Yes
## 13 Overcast  Hot   Normal False  Yes
## 14    Rainy Mild     High  True   No
op=table(data.training$OUTLOOK, data.training$PLAY)
tp=table(data.training$TEMP, data.training$PLAY)
hp=table(data.training$HUMIDITY, data.training$PLAY)
wp=table(data.training$WINDY, data.training$PLAY)
play=table(data.training$PLAY)
play=as.matrix(play)

Membuat Proporsi

pon=op[1,1]/sum(op[,1])
poy=op[1,2]/sum(op[,2])
prn=op[2,1]/sum(op[,1])
pry=op[2,2]/sum(op[,2])
psn=op[3,1]/sum(op[,1])
psy=op[3,2]/sum(op[,2])
pfn=wp[1,1]/sum(wp[,1])
ptn=wp[2,1]/sum(wp[,1])
pfy=wp[1,2]/sum(wp[,2])
pty=wp[2,2]/sum(wp[,2])
phy=hp[1,2]/sum(hp[,2])
pny=hp[2,2]/sum(hp[,2])
phn=hp[1,1]/sum(hp[,1])
pnn=hp[2,1]/sum(hp[,1])
pcn=tp[1,1]/sum(tp[,1])
pcy=tp[1,2]/sum(tp[,2])
phtn=tp[2,1]/sum(tp[,1])
phty=tp[2,2]/sum(tp[,2])
pmn=tp[3,1]/sum(tp[,1])
pmy=tp[3,2]/sum(tp[,2])
pcn=tp[1,1]/sum(tp[,1])
phtn=tp[2,1]/sum(tp[,1])
pmn=tp[3,1]/sum(tp[,1])
pcy=tp[1,2]/sum(tp[,2])
phty=tp[2,2]/sum(tp[,2])
pmy=tp[3,2]/sum(tp[,2])

yplay=play[2,1]/sum(play[,1])
nplay=play[1,1]/sum(play[,1])

#proporsi per baris
##overcast
povn=op[1,1]/sum(op[1,])
povy=op[1,2]/sum(op[1,])
prnn=op[2,1]/sum(op[2,])
prny=op[2,2]/sum(op[2,])
psnn=op[3,1]/sum(op[3,])
psny=op[3,2]/sum(op[3,])

#windy
pfln=wp[1,1]/sum(wp[1,])
ptrn=wp[2,1]/sum(wp[2,])
pfly=wp[1,2]/sum(wp[1,])
ptry=wp[2,2]/sum(wp[2,])

#humidity
phgy=hp[1,2]/sum(hp[1,])
pnry=hp[2,2]/sum(hp[2,])
phgn=hp[1,1]/sum(hp[1,])
pnrn=hp[2,1]/sum(hp[2,])

#temp
tp
##       
##        No Yes
##   Cool  1   3
##   Hot   2   2
##   Mild  2   4
pcln=tp[1,1]/sum(tp[1,])
pcly=tp[1,2]/sum(tp[1,])
photn=tp[2,1]/sum(tp[2,])
photy=tp[2,2]/sum(tp[2,])
pmln=tp[3,1]/sum(tp[3,])
pmly=tp[3,2]/sum(tp[3,])

Menghitung Nilai Likelihood

#sunny, cool, high, true, no
lklhd_no=psn*pcn*phn*ptn*nplay
lklhd_yes=psy*pcy*phy*pty*yplay

#likelihood no
lklhd_no
##         No 
## 0.02057143
#likelihood yes
lklhd_yes
##         Yes 
## 0.005291005