##########################################
# Naïve Bayes in R
##########################################
setwd("C:/datamining")

#install.packages("e1071") # install package e1071
library(e1071) # load the library

# read the data into a table from the file
arrival <- read.table("data/arrival.csv",header=TRUE,sep=",")
# define the data frames for the NB classifier
traindata <- as.data.frame(arrival[1:20,])
testdata <- as.data.frame(arrival[21,])
traindata
##         day season   wind   rain     class
## 1   weekday spring   none   none   on time
## 2   weekday winter   none slight   on time
## 3   weekday winter   none slight   on time
## 4   weekday winter   high  heavy      late
## 5  Saturday summer normal   none   on time
## 6   weekday autumn normal   none very late
## 7   holiday summer   high slight   on time
## 8    Sunday summer normal   none   on time
## 9   weekday winter   high  heavy very late
## 10  weekday summer   none slight   on time
## 11 saturday spring   high  heavy cancelled
## 12  weekday summer   high slight   on time
## 13 saturday winter normal   none      late
## 14  weekday summer   high   none   on time
## 15  weekday winter normal  heavy very late
## 16 Saturday autumn   high slight   on time
## 17  weekday autumn   none  heavy   on time
## 18  holiday spring normal slight   on time
## 19  weekday spring normal   none   on time
## 20  weekday spring normal slight   on time
testdata
##        day season wind  rain class
## 21 weekday winter high heavy
tprior <- table(traindata$class)
tprior
## 
##           cancelled      late   on time very late 
##         0         1         2        14         3
tprior <- tprior/sum(tprior)
tprior
## 
##           cancelled      late   on time very late 
##      0.00      0.05      0.10      0.70      0.15
dayCounts <- table(traindata[,c("class", "day")])
dayCounts
##            day
## class       holiday saturday Saturday Sunday weekday
##                   0        0        0      0       0
##   cancelled       0        1        0      0       0
##   late            0        1        0      0       1
##   on time         2        0        2      1       9
##   very late       0        0        0      0       3
model <- naiveBayes(class ~ day+ season + wind + rain,
                    traindata)

# display model
model
## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
## 
## A-priori probabilities:
## Y
##           cancelled      late   on time very late 
##      0.00      0.05      0.10      0.70      0.15 
## 
## Conditional probabilities:
##            day
## Y              holiday   saturday   Saturday     Sunday    weekday
##                                                                   
##   cancelled 0.00000000 1.00000000 0.00000000 0.00000000 0.00000000
##   late      0.00000000 0.50000000 0.00000000 0.00000000 0.50000000
##   on time   0.14285714 0.00000000 0.14285714 0.07142857 0.64285714
##   very late 0.00000000 0.00000000 0.00000000 0.00000000 1.00000000
## 
##            season
## Y              autumn    spring    summer    winter
##                                                    
##   cancelled 0.0000000 1.0000000 0.0000000 0.0000000
##   late      0.0000000 0.0000000 0.0000000 1.0000000
##   on time   0.1428571 0.2857143 0.4285714 0.1428571
##   very late 0.3333333 0.0000000 0.0000000 0.6666667
## 
##            wind
## Y                high      none    normal
##                                          
##   cancelled 1.0000000 0.0000000 0.0000000
##   late      0.5000000 0.0000000 0.5000000
##   on time   0.2857143 0.3571429 0.3571429
##   very late 0.3333333 0.0000000 0.6666667
## 
##            rain
## Y                heavy       none     slight
##                                             
##   cancelled 1.00000000 0.00000000 0.00000000
##   late      0.50000000 0.50000000 0.00000000
##   on time   0.07142857 0.35714286 0.57142857
##   very late 0.66666667 0.33333333 0.00000000
# predict with testdata
results <- predict (model,testdata)
# display results
results
## [1] very late
## Levels:  cancelled late on time very late