This is an introduction to Gaussian Navie Bayes.
For this demonstration, we will use iris dataset. Using Gaussian Naive Bayes, lets predict the class of the flower of a new obseravtion.
# Classes of the response variable and names of predictors
iris.classes <- c("setosa","virginica","versicolor")
iris.attributes <- names(iris[,-5])
New observation whose class we are going to predict. [Note: This observation belongs to the setosa cluster]
# New observation
observation <- data.frame(Sepal.Length = 5.0,
Sepal.Width = 3.2,
Petal.Length = 1.5,
Petal.Width = 0.3)
# Mean, standard deviation and probability densities
mean <- rep(0, length(iris.attributes))
sd <- rep(0, length(iris.attributes))
densities <- rep(0, length(iris.attributes))
# prior probability
prior <- rep(0, length(iris.classes))
# Posterior probability
posterior <- rep(0, length(iris.classes))
for(i in 1:length(iris.classes)){
prior[i] <- nrow(iris[iris$Class == iris.classes[i], ])/nrow(iris) # prior probability
for(j in 1:length(iris.attributes)){
mean[j] <- sapply(iris[iris$Class == iris.classes[i],][j], mean) # Mean of attributes
sd[j] <- sapply(iris[iris$Class == iris.classes[i],][j], sd) # standard deviation of attributes
densities[j] <- dnorm(as.numeric(observation[j]), mean[j], sd[j]) # probability density of the new observation
}
posterior[i] <- prior[i]*prod(densities) # Posterior probability
}
Lets add labels to the observation
names(posterior) <- c("setosa","versicolor","virginica")# Add labels
print(posterior)
## setosa versicolor virginica
## NA NA NA
predictedvalue <- round(posterior/ sum(posterior), digits = 3) # normalize
print(predictedvalue)
## setosa versicolor virginica
## NA NA NA
The model correctly predicts the class of the new observation as ‘Setosa’