Naive Bayes Example

Telecom Customers

Loading the data:

telecom <- read.csv("F:\\Statistics\\Cases\\Telecom\\Telecom.csv")
head(telecom)

##   Gender TT_gt_100 Response
## 1      F         Y        N
## 2      M         N        N
## 3      M         N        N
## 4      F         Y        Y
## 5      F         N        N
## 6      F         N        N

Paritioning the Data Using package caret

library(caret)

## Loading required package: lattice

## Loading required package: ggplot2

set.seed(333)
intrain <- createDataPartition(y=telecom$Response,
                               p=0.7,list = FALSE)

training <- telecom[intrain,   ]
validation <- telecom[-intrain,]

Model Building on Training Set

library(e1071)
classifier <- naiveBayes(training[,1:2], training[,3]) 
classifier

## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = training[, 1:2], y = training[, 3])
## 
## A-priori probabilities:
## training[, 3]
##         N         Y 
## 0.4811321 0.5188679 
## 
## Conditional probabilities:
##              Gender
## training[, 3]         F         M
##             N 0.1764706 0.8235294
##             Y 0.7818182 0.2181818
## 
##              TT_gt_100
## training[, 3]         N         Y
##             N 0.7647059 0.2352941
##             Y 0.1636364 0.8363636

Predicting Majority Probability Classes on Validation Set

PredY <- predict(classifier, newdata=validation[,-3], 
                 type="class")
PredY

##  [1] Y N Y Y Y Y Y N Y Y Y Y N N Y Y N Y Y Y Y N Y Y N Y Y Y N N Y Y Y N Y
## [36] Y N Y N Y N N Y Y
## Levels: N Y

Predicting Probabilites on Validation Set

PredYProb <- predict(classifier, newdata=validation[,-3],
                     type="raw")
PredYProb

##                N          Y
##  [1,] 0.05560861 0.94439139
##  [2,] 0.94238373 0.05761627
##  [3,] 0.49613402 0.50386598
##  [4,] 0.05560861 0.94439139
##  [5,] 0.05560861 0.94439139
##  [6,] 0.05560861 0.94439139
##  [7,] 0.05560861 0.94439139
##  [8,] 0.94238373 0.05761627
##  [9,] 0.05560861 0.94439139
## [10,] 0.05560861 0.94439139
## [11,] 0.05560861 0.94439139
## [12,] 0.05560861 0.94439139
## [13,] 0.94238373 0.05761627
## [14,] 0.94238373 0.05761627
## [15,] 0.05560861 0.94439139
## [16,] 0.49613402 0.50386598
## [17,] 0.94238373 0.05761627
## [18,] 0.05560861 0.94439139
## [19,] 0.05560861 0.94439139
## [20,] 0.05560861 0.94439139
## [21,] 0.05560861 0.94439139
## [22,] 0.94238373 0.05761627
## [23,] 0.05560861 0.94439139
## [24,] 0.49613402 0.50386598
## [25,] 0.94238373 0.05761627
## [26,] 0.49446750 0.50553250
## [27,] 0.05560861 0.94439139
## [28,] 0.49446750 0.50553250
## [29,] 0.94238373 0.05761627
## [30,] 0.94238373 0.05761627
## [31,] 0.05560861 0.94439139
## [32,] 0.05560861 0.94439139
## [33,] 0.05560861 0.94439139
## [34,] 0.94238373 0.05761627
## [35,] 0.05560861 0.94439139
## [36,] 0.05560861 0.94439139
## [37,] 0.94238373 0.05761627
## [38,] 0.05560861 0.94439139
## [39,] 0.94238373 0.05761627
## [40,] 0.05560861 0.94439139
## [41,] 0.94238373 0.05761627
## [42,] 0.94238373 0.05761627
## [43,] 0.49613402 0.50386598
## [44,] 0.49613402 0.50386598

Evaluatinn the model

tbl <- table(PredY, validation[,3],
             dnn=list('predicted','actual'))
confusionMatrix(tbl,positive = "Y")

## Confusion Matrix and Statistics
## 
##          actual
## predicted  N  Y
##         N 13  1
##         Y  8 22
##                                         
##                Accuracy : 0.7955        
##                  95% CI : (0.647, 0.902)
##     No Information Rate : 0.5227        
##     P-Value [Acc > NIR] : 0.0001702     
##                                         
##                   Kappa : 0.584         
##  Mcnemar's Test P-Value : 0.0455003     
##                                         
##             Sensitivity : 0.9565        
##             Specificity : 0.6190        
##          Pos Pred Value : 0.7333        
##          Neg Pred Value : 0.9286        
##              Prevalence : 0.5227        
##          Detection Rate : 0.5000        
##    Detection Prevalence : 0.6818        
##       Balanced Accuracy : 0.7878        
##                                         
##        'Positive' Class : Y             
##

ROC

library(pROC)

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

plot.roc(validation[,3],PredYProb[,2],legacy.axes=TRUE,print.auc=TRUE )

Naive Bayes Example

Sanjay Sane

17 February 2018

Telecom Customers