setwd("C:/Users/nicke/OneDrive/Desktop/School/Year 4/Winter/Data Science")


library(e1071)

arrival <- read.table("arrival.csv", header = TRUE, sep = ",")
traindata <- as.data.frame(arrival[1:18,])
testdata <- as.data.frame(arrival[4,])
traindata
##         day season   wind   rain     class
## 1   weekday spring   none   none   on time
## 2   weekday winter   none slight   on time
## 3   weekday winter   none slight   on time
## 4   weekday winter   high  heavy      late
## 5  Saturday summer normal   none   on time
## 6   weekday autumn normal   none very late
## 7   holiday summer   high slight   on time
## 8    Sunday summer normal   none   on time
## 9   weekday winter   high  heavy very late
## 10  weekday summer   none slight   on time
## 11 saturday spring   high  heavy cancelled
## 12  weekday summer   high slight   on time
## 13 saturday winter normal   none      late
## 14  weekday summer   high   none   on time
## 15  weekday winter normal  heavy very late
## 16 Saturday autumn   high slight   on time
## 17  weekday autumn   none  heavy   on time
## 18  holiday spring normal slight   on time
testdata
##       day season wind  rain class
## 4 weekday winter high heavy  late
tprior <- table(traindata$class)
tprior
## 
## cancelled      late   on time very late 
##         1         2        12         3
tprior <- tprior/sum(tprior)
tprior
## 
##  cancelled       late    on time  very late 
## 0.05555556 0.11111111 0.66666667 0.16666667
dayCounts <- table(traindata[,c("class", "day")])
dayCounts
##            day
## class       holiday saturday Saturday Sunday weekday
##   cancelled       0        1        0      0       0
##   late            0        1        0      0       1
##   on time         2        0        2      1       7
##   very late       0        0        0      0       3
dayCounts <- dayCounts/rowSums(dayCounts)
dayCounts
##            day
## class          holiday   saturday   Saturday     Sunday    weekday
##   cancelled 0.00000000 1.00000000 0.00000000 0.00000000 0.00000000
##   late      0.00000000 0.50000000 0.00000000 0.00000000 0.50000000
##   on time   0.16666667 0.00000000 0.16666667 0.08333333 0.58333333
##   very late 0.00000000 0.00000000 0.00000000 0.00000000 1.00000000
seasonCounts <- table(traindata[,c("class", "season")])
seasonCounts <- seasonCounts/rowSums(seasonCounts)
seasonCounts
##            season
## class          autumn    spring    summer    winter
##   cancelled 0.0000000 1.0000000 0.0000000 0.0000000
##   late      0.0000000 0.0000000 0.0000000 1.0000000
##   on time   0.1666667 0.1666667 0.5000000 0.1666667
##   very late 0.3333333 0.0000000 0.0000000 0.6666667
windCounts <- table(traindata[,c("class", "wind")])
windCounts <- windCounts/rowSums(windCounts)
windCounts
##            wind
## class            high      none    normal
##   cancelled 1.0000000 0.0000000 0.0000000
##   late      0.5000000 0.0000000 0.5000000
##   on time   0.3333333 0.4166667 0.2500000
##   very late 0.3333333 0.0000000 0.6666667
rainCounts <- table(traindata[,c("class", "rain")])
rainCounts <- rainCounts/rowSums(rainCounts)
rainCounts
##            rain
## class            heavy       none     slight
##   cancelled 1.00000000 0.00000000 0.00000000
##   late      0.50000000 0.50000000 0.00000000
##   on time   0.08333333 0.33333333 0.58333333
##   very late 0.66666667 0.33333333 0.00000000
prob_ontime <-
  dayCounts["on time",testdata[,c("day")]]*
  seasonCounts["on time",testdata[,c("season")]]*
  windCounts["on time",testdata[,c("wind")]]*
  rainCounts["on time",testdata[,c("rain")]]*
  tprior["on time"]

prob_late <-
  dayCounts["late",testdata[,c("day")]]*
  seasonCounts["late",testdata[,c("season")]]*
  windCounts["late",testdata[,c("wind")]]*
  rainCounts["late",testdata[,c("rain")]]*
  tprior["late"]

prob_verylate <-
  dayCounts["very late",testdata[,c("day")]]*
  seasonCounts["very late",testdata[,c("season")]]*
  windCounts["very late",testdata[,c("wind")]]*
  rainCounts["very late",testdata[,c("rain")]]*
  tprior["very late"]

prob_cancelled <-
  dayCounts["cancelled",testdata[,c("day")]]*
  seasonCounts["cancelled",testdata[,c("season")]]*
  windCounts["cancelled",testdata[,c("wind")]]*
  rainCounts["cancelled",testdata[,c("rain")]]*
  tprior["cancelled"]

prob_ontime
##     on time 
## 0.001800412
prob_late
##       late 
## 0.01388889
prob_verylate
##  very late 
## 0.02469136
prob_cancelled
## cancelled 
##         0
max(prob_ontime,prob_late,prob_verylate,prob_cancelled)
## [1] 0.02469136
model <- naiveBayes(class ~ day+season+wind+rain,
                    traindata)

model
## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
## 
## A-priori probabilities:
## Y
##  cancelled       late    on time  very late 
## 0.05555556 0.11111111 0.66666667 0.16666667 
## 
## Conditional probabilities:
##            day
## Y              holiday   saturday   Saturday     Sunday    weekday
##   cancelled 0.00000000 1.00000000 0.00000000 0.00000000 0.00000000
##   late      0.00000000 0.50000000 0.00000000 0.00000000 0.50000000
##   on time   0.16666667 0.00000000 0.16666667 0.08333333 0.58333333
##   very late 0.00000000 0.00000000 0.00000000 0.00000000 1.00000000
## 
##            season
## Y              autumn    spring    summer    winter
##   cancelled 0.0000000 1.0000000 0.0000000 0.0000000
##   late      0.0000000 0.0000000 0.0000000 1.0000000
##   on time   0.1666667 0.1666667 0.5000000 0.1666667
##   very late 0.3333333 0.0000000 0.0000000 0.6666667
## 
##            wind
## Y                high      none    normal
##   cancelled 1.0000000 0.0000000 0.0000000
##   late      0.5000000 0.0000000 0.5000000
##   on time   0.3333333 0.4166667 0.2500000
##   very late 0.3333333 0.0000000 0.6666667
## 
##            rain
## Y                heavy       none     slight
##   cancelled 1.00000000 0.00000000 0.00000000
##   late      0.50000000 0.50000000 0.00000000
##   on time   0.08333333 0.33333333 0.58333333
##   very late 0.66666667 0.33333333 0.00000000
results <- predict(model,testdata)
results
## [1] very late
## Levels: cancelled late on time very late
model1 <- naiveBayes(class ~., traindata, laplace = .01)
model1
## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
## 
## A-priori probabilities:
## Y
##  cancelled       late    on time  very late 
## 0.05555556 0.11111111 0.66666667 0.16666667 
## 
## Conditional probabilities:
##            day
## Y                holiday     saturday     Saturday       Sunday      weekday
##   cancelled 0.0095238095 0.9619047619 0.0095238095 0.0095238095 0.0095238095
##   late      0.0048780488 0.4926829268 0.0048780488 0.0048780488 0.4926829268
##   on time   0.1668049793 0.0008298755 0.1668049793 0.0838174274 0.5817427386
##   very late 0.0032786885 0.0032786885 0.0032786885 0.0032786885 0.9868852459
## 
##            season
## Y                autumn      spring      summer      winter
##   cancelled 0.009615385 0.971153846 0.009615385 0.009615385
##   late      0.004901961 0.004901961 0.004901961 0.985294118
##   on time   0.166943522 0.166943522 0.499169435 0.166943522
##   very late 0.332236842 0.003289474 0.003289474 0.661184211
## 
##            wind
## Y                  high        none      normal
##   cancelled 0.980582524 0.009708738 0.009708738
##   late      0.497536946 0.004926108 0.497536946
##   on time   0.333333333 0.416458853 0.250207814
##   very late 0.333333333 0.003300330 0.663366337
## 
##            rain
## Y                 heavy        none      slight
##   cancelled 0.980582524 0.009708738 0.009708738
##   late      0.497536946 0.497536946 0.004926108
##   on time   0.083956775 0.333333333 0.582709892
##   very late 0.663366337 0.333333333 0.003300330
results1 <- predict(model1,testdata)
results1
## [1] very late
## Levels: cancelled late on time very late