Baca Data

setwd("D:/BARU/Damin2016/Naive bayes")

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.1.3
library(reshape)
## Warning: package 'reshape' was built under R version 3.1.3
library(klaR)
## Warning: package 'klaR' was built under R version 3.1.3
## Loading required package: MASS
library(plyr)
## Warning: package 'plyr' was built under R version 3.1.3
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:reshape':
## 
##     rename, round_any
data=read.csv("playtennis.csv",header=TRUE,sep=",")

data
##    id   Outlook  Temp Humidity  Windy Play
## 1  a     Sunny   Hot     High  FALSE   No 
## 2  b     Sunny   Hot     High   TRUE   No 
## 3  c  Overcast   Hot     High  FALSE  Yes 
## 4  d     Rainy  Mild     High  FALSE  Yes 
## 5  e     Rainy  Cool   Normal  FALSE  Yes 
## 6  f     Rainy  Cool   Normal   TRUE   No 
## 7  g  Overcast  Cool   Normal   TRUE  Yes 
## 8  h     Sunny  Mild     High  FALSE   No 
## 9  i     Sunny  Cool   Normal  FALSE  Yes 
## 10 j     Rainy  Mild   Normal  FALSE  Yes 
## 11 k     Sunny  Mild   Normal   TRUE  Yes 
## 12 l  Overcast  Mild     High   TRUE  Yes 
## 13 m  Overcast   Hot   Normal  FALSE  Yes 
## 14 n     Rainy  Mild     High   TRUE   No
data=data[,-1]
data
##      Outlook  Temp Humidity  Windy Play
## 1     Sunny   Hot     High  FALSE   No 
## 2     Sunny   Hot     High   TRUE   No 
## 3  Overcast   Hot     High  FALSE  Yes 
## 4     Rainy  Mild     High  FALSE  Yes 
## 5     Rainy  Cool   Normal  FALSE  Yes 
## 6     Rainy  Cool   Normal   TRUE   No 
## 7  Overcast  Cool   Normal   TRUE  Yes 
## 8     Sunny  Mild     High  FALSE   No 
## 9     Sunny  Cool   Normal  FALSE  Yes 
## 10    Rainy  Mild   Normal  FALSE  Yes 
## 11    Sunny  Mild   Normal   TRUE  Yes 
## 12 Overcast  Mild     High   TRUE  Yes 
## 13 Overcast   Hot   Normal  FALSE  Yes 
## 14    Rainy  Mild     High   TRUE   No

Split data 80% Training

train.ind <- sample(1:nrow(data), ceiling(nrow(data)*0.8), replace=FALSE)

# apply NB classifier
nb.res <- NaiveBayes(Play~.,data=data[train.ind,])
nb.res$tables
## $Outlook
##         
## grouping Overcast     Rainy     Sunny 
##     No   0.0000000 0.4000000 0.6000000
##     Yes  0.4285714 0.2857143 0.2857143
## 
## $Temp
##         
## grouping     Cool       Hot      Mild 
##     No   0.2000000 0.4000000 0.4000000
##     Yes  0.2857143 0.1428571 0.5714286
## 
## $Humidity
##         
## grouping     High    Normal 
##     No   0.8000000 0.2000000
##     Yes  0.4285714 0.5714286
## 
## $Windy
##         
## grouping    FALSE      TRUE 
##     No   0.4000000 0.6000000
##     Yes  0.5714286 0.4285714
nb.pred <- predict(nb.res,data[train.ind,])
confusion.mat <- table(data[train.ind,"Play"],nb.pred$class)
sum(diag(confusion.mat))/sum(confusion.mat)
## [1] 0.9166667
nb.pred <- predict(nb.res,data[-train.ind,])

# raw accuracy
length(nb.pred$class)
## [1] 2
confusion.mat <- table(data[-train.ind,"Play"],nb.pred$class)
sum(diag(confusion.mat))/sum(confusion.mat)
## [1] 1

Nomogram

#sbg response adalah Y
lcModelProbs <- melt(nb.res$tables)
#hanya simbol nomogram
data2=as.data.frame(lcModelProbs)

data2$Var.2=factor(data2$Var.2, levels=data2[order(data2$L1, -data2$value), ]$Var.2)
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated
g=ggplot(data2, aes(value,Var.2, label=grouping)) +scale_x_continuous(limits = c(0, 1), breaks=seq(0, 1, by = 0.05))

g+geom_point(aes(shape = factor(grouping),colour = factor(grouping)))+ylab("Kategori") + xlab("Peluang")+labs(shape="Species label")+theme_bw()
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

PLot variabel prediktor

#plot satu satu
lcModelProbs1=lcModelProbs[lcModelProbs$L1=="Outlook",]


Data = ddply(lcModelProbs1,.(Var.2), transform, pos = cumsum(value) - 0.5*value)
Data$value=as.numeric(format(round(Data$value, 3), nsmall = 3))

# plot bars and add text
p = ggplot(Data,aes(x = Var.2, y = value)) +  
    geom_bar(aes(fill = grouping),stat="identity")+scale_fill_brewer(palette="blues")+ylab("Peluang") + xlab("Outlook")+labs(fill="Status")+
    geom_text(aes(label = value, y = pos), size = 3)+theme_bw()+theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+theme( # remove the vertical grid lines
           panel.grid.major.x = element_blank() ,
           # explicitly set the horizontal lines (or they will disappear too)
           panel.grid.major.y = element_line( size=.1, color="black" ) 
    )
## Warning in pal_name(palette, type): Unknown palette blues
p