Baca Data
setwd("D:/BARU/Damin2016/Naive bayes")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.1.3
library(reshape)
## Warning: package 'reshape' was built under R version 3.1.3
library(klaR)
## Warning: package 'klaR' was built under R version 3.1.3
## Loading required package: MASS
library(plyr)
## Warning: package 'plyr' was built under R version 3.1.3
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:reshape':
##
## rename, round_any
data=read.csv("playtennis.csv",header=TRUE,sep=",")
data
## id Outlook Temp Humidity Windy Play
## 1 a Sunny Hot High FALSE No
## 2 b Sunny Hot High TRUE No
## 3 c Overcast Hot High FALSE Yes
## 4 d Rainy Mild High FALSE Yes
## 5 e Rainy Cool Normal FALSE Yes
## 6 f Rainy Cool Normal TRUE No
## 7 g Overcast Cool Normal TRUE Yes
## 8 h Sunny Mild High FALSE No
## 9 i Sunny Cool Normal FALSE Yes
## 10 j Rainy Mild Normal FALSE Yes
## 11 k Sunny Mild Normal TRUE Yes
## 12 l Overcast Mild High TRUE Yes
## 13 m Overcast Hot Normal FALSE Yes
## 14 n Rainy Mild High TRUE No
data=data[,-1]
data
## Outlook Temp Humidity Windy Play
## 1 Sunny Hot High FALSE No
## 2 Sunny Hot High TRUE No
## 3 Overcast Hot High FALSE Yes
## 4 Rainy Mild High FALSE Yes
## 5 Rainy Cool Normal FALSE Yes
## 6 Rainy Cool Normal TRUE No
## 7 Overcast Cool Normal TRUE Yes
## 8 Sunny Mild High FALSE No
## 9 Sunny Cool Normal FALSE Yes
## 10 Rainy Mild Normal FALSE Yes
## 11 Sunny Mild Normal TRUE Yes
## 12 Overcast Mild High TRUE Yes
## 13 Overcast Hot Normal FALSE Yes
## 14 Rainy Mild High TRUE No
Split data 80% Training
train.ind <- sample(1:nrow(data), ceiling(nrow(data)*0.8), replace=FALSE)
# apply NB classifier
nb.res <- NaiveBayes(Play~.,data=data[train.ind,])
nb.res$tables
## $Outlook
##
## grouping Overcast Rainy Sunny
## No 0.0000000 0.4000000 0.6000000
## Yes 0.4285714 0.2857143 0.2857143
##
## $Temp
##
## grouping Cool Hot Mild
## No 0.2000000 0.4000000 0.4000000
## Yes 0.2857143 0.1428571 0.5714286
##
## $Humidity
##
## grouping High Normal
## No 0.8000000 0.2000000
## Yes 0.4285714 0.5714286
##
## $Windy
##
## grouping FALSE TRUE
## No 0.4000000 0.6000000
## Yes 0.5714286 0.4285714
nb.pred <- predict(nb.res,data[train.ind,])
confusion.mat <- table(data[train.ind,"Play"],nb.pred$class)
sum(diag(confusion.mat))/sum(confusion.mat)
## [1] 0.9166667
nb.pred <- predict(nb.res,data[-train.ind,])
# raw accuracy
length(nb.pred$class)
## [1] 2
confusion.mat <- table(data[-train.ind,"Play"],nb.pred$class)
sum(diag(confusion.mat))/sum(confusion.mat)
## [1] 1
Nomogram
#sbg response adalah Y
lcModelProbs <- melt(nb.res$tables)
#hanya simbol nomogram
data2=as.data.frame(lcModelProbs)
data2$Var.2=factor(data2$Var.2, levels=data2[order(data2$L1, -data2$value), ]$Var.2)
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated
g=ggplot(data2, aes(value,Var.2, label=grouping)) +scale_x_continuous(limits = c(0, 1), breaks=seq(0, 1, by = 0.05))
g+geom_point(aes(shape = factor(grouping),colour = factor(grouping)))+ylab("Kategori") + xlab("Peluang")+labs(shape="Species label")+theme_bw()
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated
PLot variabel prediktor
#plot satu satu
lcModelProbs1=lcModelProbs[lcModelProbs$L1=="Outlook",]
Data = ddply(lcModelProbs1,.(Var.2), transform, pos = cumsum(value) - 0.5*value)
Data$value=as.numeric(format(round(Data$value, 3), nsmall = 3))
# plot bars and add text
p = ggplot(Data,aes(x = Var.2, y = value)) +
geom_bar(aes(fill = grouping),stat="identity")+scale_fill_brewer(palette="blues")+ylab("Peluang") + xlab("Outlook")+labs(fill="Status")+
geom_text(aes(label = value, y = pos), size = 3)+theme_bw()+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+theme( # remove the vertical grid lines
panel.grid.major.x = element_blank() ,
# explicitly set the horizontal lines (or they will disappear too)
panel.grid.major.y = element_line( size=.1, color="black" )
)
## Warning in pal_name(palette, type): Unknown palette blues
p