setwd("C:/datamining")
library(e1071)
arrival <- read.table("data/arrival.csv",header=TRUE,sep=",")
traindata <- as.data.frame(arrival[1:20,])
testdata <- as.data.frame(arrival[21,])
traindata
## day season wind rain class
## 1 weekday spring none none on time
## 2 weekday winter none slight on time
## 3 weekday winter none slight on time
## 4 weekday winter high heavy late
## 5 Saturday summer normal none on time
## 6 weekday autumn normal none very late
## 7 holiday summer high slight on time
## 8 Sunday summer normal none on time
## 9 weekday winter high heavy very late
## 10 weekday summer none slight on time
## 11 saturday spring high heavy cancelled
## 12 weekday summer high slight on time
## 13 saturday winter normal none late
## 14 weekday summer high none on time
## 15 weekday winter normal heavy very late
## 16 Saturday autumn high slight on time
## 17 weekday autumn none heavy on time
## 18 holiday spring normal slight on time
## 19 weekday spring normal none on time
## 20 weekday spring normal slight on time
testdata
## day season wind rain class
## 21 weekday winter high heavy
tprior <- table(traindata$class)
tprior
##
## cancelled late on time very late
## 0 1 2 14 3
tprior <- tprior/sum(tprior)
tprior
##
## cancelled late on time very late
## 0.00 0.05 0.10 0.70 0.15
dayCounts <- table(traindata[,c("class", "day")])
dayCounts
## day
## class holiday saturday Saturday Sunday weekday
## 0 0 0 0 0
## cancelled 0 1 0 0 0
## late 0 1 0 0 1
## on time 2 0 2 1 9
## very late 0 0 0 0 3
model <- naiveBayes(class ~ day+ season + wind + rain,
traindata)
model
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## cancelled late on time very late
## 0.00 0.05 0.10 0.70 0.15
##
## Conditional probabilities:
## day
## Y holiday saturday Saturday Sunday weekday
##
## cancelled 0.00000000 1.00000000 0.00000000 0.00000000 0.00000000
## late 0.00000000 0.50000000 0.00000000 0.00000000 0.50000000
## on time 0.14285714 0.00000000 0.14285714 0.07142857 0.64285714
## very late 0.00000000 0.00000000 0.00000000 0.00000000 1.00000000
##
## season
## Y autumn spring summer winter
##
## cancelled 0.0000000 1.0000000 0.0000000 0.0000000
## late 0.0000000 0.0000000 0.0000000 1.0000000
## on time 0.1428571 0.2857143 0.4285714 0.1428571
## very late 0.3333333 0.0000000 0.0000000 0.6666667
##
## wind
## Y high none normal
##
## cancelled 1.0000000 0.0000000 0.0000000
## late 0.5000000 0.0000000 0.5000000
## on time 0.2857143 0.3571429 0.3571429
## very late 0.3333333 0.0000000 0.6666667
##
## rain
## Y heavy none slight
##
## cancelled 1.00000000 0.00000000 0.00000000
## late 0.50000000 0.50000000 0.00000000
## on time 0.07142857 0.35714286 0.57142857
## very late 0.66666667 0.33333333 0.00000000
results <- predict (model,testdata)
results
## [1] very late
## Levels: cancelled late on time very late