#TO IMPLEMENT NAIVE-BAYES CLASSIFICATION ON THEE GIVEN SET OF DATA
#we need to download this library first to implment naive bayes
#install.packages("e1071")
#install.packages("ggplot2")
library(e1071)
## Warning: package 'e1071' was built under R version 3.4.4
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
#to check the working directory
getwd()
## [1] "F:/share/lab_project"
#to change to the present working directory
setwd("F:\\share\\lab_project")
#for retrieving the data from the csv file
df<- read.csv("nassCDS.csv",header=TRUE)
#print(df)
#this will create a separate pane for the dataset
#view(df)
#we need dead,airbag,seatbelt and sex in our dataset, all the other data in the given dataset is irrelevant
#for our project, hence we create a new dataset from the existing one.
#DATA PREPARATION
dataframe1<- data.frame(dead=df$dead,airbag=df$airbag,seatbelt=df$seatbelt,sex=df$sex)
#dataframe1
write.csv(dataframe1,file="dataset1.csv",row.names = FALSE)
df1<-read.csv("dataset1.csv",header=TRUE)
#print(df1)
#Visualisation of data
#we can have a visualisation for the year of accidents and severity of the injury.This will help us in unerstanding the data.
counts<-table(df$injSeverity,df$yearacc)
#legend is used to give description about the severity level
barplot(counts,main = "YEAR OF ACCIDENT VS SEVERITY OF INJURY",xlab ="year of accident",ylab = "severity",col = c("#0000FFFF","#0080FFFF"),legend=rownames(counts),beside = TRUE)

#plot function
p1<-plot(df$dead,df$ageOFocc)

p<-ggplot(df,aes(x=df$dead,y=df$ageOFocc))+geom_boxplot()+geom_jitter()+geom_boxplot(outlier.size = 0,alpha=0.8)+ggtitle("Dead Vs Age Of the Person involved in acc")+guides(colours=FALSE)
#DATA MANIPULATION
#checking the levels or the categories lying under each column
levels(df1$dead)
## [1] "alive" "dead"
levels(df1$airbag)
## [1] "airbag" "none"
levels(df1$seatbelt)
## [1] "belted" "none"
levels(df1$sex)
## [1] "f" "m"
#APPLYING NAIVE_BAYES THEOREM(CREATING A MODEL)
naive_bayes_model = naiveBayes(df1$dead~.,data=df1)
#to print the model summary
naive_bayes_model
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## alive dead
## 0.4974635 0.5025365
##
## Conditional probabilities:
## airbag
## Y airbag none
## alive 0.6324950 0.3675050
## dead 0.4683112 0.5316888
##
## seatbelt
## Y belted none
## alive 0.7310995 0.2689005
## dead 0.6859962 0.3140038
##
## sex
## Y f m
## alive 0.4735470 0.5264530
## dead 0.4608729 0.5391271
#the above also prints the apriori probability which predicts the distribaution of our data
#PREDICTION
prediction=predict(naive_bayes_model,dataframe1)
#creation of confusion matrix to check accuracy of the prediction
table(prediction,df1$dead)
##
## prediction alive dead
## alive 8249 6170
## dead 4793 7005
#the below given command is used to create a probability table
prop.table(table(prediction,df1$dead),1)
##
## prediction alive dead
## alive 0.5720924 0.4279076
## dead 0.4062553 0.5937447