setwd("C:/Users/nicke/OneDrive/Desktop/School/Year 4/Winter/Data Science")
library(e1071)
arrival <- read.table("arrival.csv", header = TRUE, sep = ",")
traindata <- as.data.frame(arrival[1:18,])
testdata <- as.data.frame(arrival[4,])
traindata
## day season wind rain class
## 1 weekday spring none none on time
## 2 weekday winter none slight on time
## 3 weekday winter none slight on time
## 4 weekday winter high heavy late
## 5 Saturday summer normal none on time
## 6 weekday autumn normal none very late
## 7 holiday summer high slight on time
## 8 Sunday summer normal none on time
## 9 weekday winter high heavy very late
## 10 weekday summer none slight on time
## 11 saturday spring high heavy cancelled
## 12 weekday summer high slight on time
## 13 saturday winter normal none late
## 14 weekday summer high none on time
## 15 weekday winter normal heavy very late
## 16 Saturday autumn high slight on time
## 17 weekday autumn none heavy on time
## 18 holiday spring normal slight on time
testdata
## day season wind rain class
## 4 weekday winter high heavy late
tprior <- table(traindata$class)
tprior
##
## cancelled late on time very late
## 1 2 12 3
tprior <- tprior/sum(tprior)
tprior
##
## cancelled late on time very late
## 0.05555556 0.11111111 0.66666667 0.16666667
dayCounts <- table(traindata[,c("class", "day")])
dayCounts
## day
## class holiday saturday Saturday Sunday weekday
## cancelled 0 1 0 0 0
## late 0 1 0 0 1
## on time 2 0 2 1 7
## very late 0 0 0 0 3
dayCounts <- dayCounts/rowSums(dayCounts)
dayCounts
## day
## class holiday saturday Saturday Sunday weekday
## cancelled 0.00000000 1.00000000 0.00000000 0.00000000 0.00000000
## late 0.00000000 0.50000000 0.00000000 0.00000000 0.50000000
## on time 0.16666667 0.00000000 0.16666667 0.08333333 0.58333333
## very late 0.00000000 0.00000000 0.00000000 0.00000000 1.00000000
seasonCounts <- table(traindata[,c("class", "season")])
seasonCounts <- seasonCounts/rowSums(seasonCounts)
seasonCounts
## season
## class autumn spring summer winter
## cancelled 0.0000000 1.0000000 0.0000000 0.0000000
## late 0.0000000 0.0000000 0.0000000 1.0000000
## on time 0.1666667 0.1666667 0.5000000 0.1666667
## very late 0.3333333 0.0000000 0.0000000 0.6666667
windCounts <- table(traindata[,c("class", "wind")])
windCounts <- windCounts/rowSums(windCounts)
windCounts
## wind
## class high none normal
## cancelled 1.0000000 0.0000000 0.0000000
## late 0.5000000 0.0000000 0.5000000
## on time 0.3333333 0.4166667 0.2500000
## very late 0.3333333 0.0000000 0.6666667
rainCounts <- table(traindata[,c("class", "rain")])
rainCounts <- rainCounts/rowSums(rainCounts)
rainCounts
## rain
## class heavy none slight
## cancelled 1.00000000 0.00000000 0.00000000
## late 0.50000000 0.50000000 0.00000000
## on time 0.08333333 0.33333333 0.58333333
## very late 0.66666667 0.33333333 0.00000000
prob_ontime <-
dayCounts["on time",testdata[,c("day")]]*
seasonCounts["on time",testdata[,c("season")]]*
windCounts["on time",testdata[,c("wind")]]*
rainCounts["on time",testdata[,c("rain")]]*
tprior["on time"]
prob_late <-
dayCounts["late",testdata[,c("day")]]*
seasonCounts["late",testdata[,c("season")]]*
windCounts["late",testdata[,c("wind")]]*
rainCounts["late",testdata[,c("rain")]]*
tprior["late"]
prob_verylate <-
dayCounts["very late",testdata[,c("day")]]*
seasonCounts["very late",testdata[,c("season")]]*
windCounts["very late",testdata[,c("wind")]]*
rainCounts["very late",testdata[,c("rain")]]*
tprior["very late"]
prob_cancelled <-
dayCounts["cancelled",testdata[,c("day")]]*
seasonCounts["cancelled",testdata[,c("season")]]*
windCounts["cancelled",testdata[,c("wind")]]*
rainCounts["cancelled",testdata[,c("rain")]]*
tprior["cancelled"]
prob_ontime
## on time
## 0.001800412
prob_late
## late
## 0.01388889
prob_verylate
## very late
## 0.02469136
prob_cancelled
## cancelled
## 0
max(prob_ontime,prob_late,prob_verylate,prob_cancelled)
## [1] 0.02469136
model <- naiveBayes(class ~ day+season+wind+rain,
traindata)
model
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## cancelled late on time very late
## 0.05555556 0.11111111 0.66666667 0.16666667
##
## Conditional probabilities:
## day
## Y holiday saturday Saturday Sunday weekday
## cancelled 0.00000000 1.00000000 0.00000000 0.00000000 0.00000000
## late 0.00000000 0.50000000 0.00000000 0.00000000 0.50000000
## on time 0.16666667 0.00000000 0.16666667 0.08333333 0.58333333
## very late 0.00000000 0.00000000 0.00000000 0.00000000 1.00000000
##
## season
## Y autumn spring summer winter
## cancelled 0.0000000 1.0000000 0.0000000 0.0000000
## late 0.0000000 0.0000000 0.0000000 1.0000000
## on time 0.1666667 0.1666667 0.5000000 0.1666667
## very late 0.3333333 0.0000000 0.0000000 0.6666667
##
## wind
## Y high none normal
## cancelled 1.0000000 0.0000000 0.0000000
## late 0.5000000 0.0000000 0.5000000
## on time 0.3333333 0.4166667 0.2500000
## very late 0.3333333 0.0000000 0.6666667
##
## rain
## Y heavy none slight
## cancelled 1.00000000 0.00000000 0.00000000
## late 0.50000000 0.50000000 0.00000000
## on time 0.08333333 0.33333333 0.58333333
## very late 0.66666667 0.33333333 0.00000000
results <- predict(model,testdata)
results
## [1] very late
## Levels: cancelled late on time very late
model1 <- naiveBayes(class ~., traindata, laplace = .01)
model1
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## cancelled late on time very late
## 0.05555556 0.11111111 0.66666667 0.16666667
##
## Conditional probabilities:
## day
## Y holiday saturday Saturday Sunday weekday
## cancelled 0.0095238095 0.9619047619 0.0095238095 0.0095238095 0.0095238095
## late 0.0048780488 0.4926829268 0.0048780488 0.0048780488 0.4926829268
## on time 0.1668049793 0.0008298755 0.1668049793 0.0838174274 0.5817427386
## very late 0.0032786885 0.0032786885 0.0032786885 0.0032786885 0.9868852459
##
## season
## Y autumn spring summer winter
## cancelled 0.009615385 0.971153846 0.009615385 0.009615385
## late 0.004901961 0.004901961 0.004901961 0.985294118
## on time 0.166943522 0.166943522 0.499169435 0.166943522
## very late 0.332236842 0.003289474 0.003289474 0.661184211
##
## wind
## Y high none normal
## cancelled 0.980582524 0.009708738 0.009708738
## late 0.497536946 0.004926108 0.497536946
## on time 0.333333333 0.416458853 0.250207814
## very late 0.333333333 0.003300330 0.663366337
##
## rain
## Y heavy none slight
## cancelled 0.980582524 0.009708738 0.009708738
## late 0.497536946 0.497536946 0.004926108
## on time 0.083956775 0.333333333 0.582709892
## very late 0.663366337 0.333333333 0.003300330
results1 <- predict(model1,testdata)
results1
## [1] very late
## Levels: cancelled late on time very late