Loading the data:
telecom <- read.csv("F:\\Statistics\\Cases\\Telecom\\Telecom.csv")
head(telecom)
## Gender TT_gt_100 Response
## 1 F Y N
## 2 M N N
## 3 M N N
## 4 F Y Y
## 5 F N N
## 6 F N N
Paritioning the Data Using package caret
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
set.seed(333)
intrain <- createDataPartition(y=telecom$Response,
p=0.7,list = FALSE)
training <- telecom[intrain, ]
validation <- telecom[-intrain,]
Model Building on Training Set
library(e1071)
classifier <- naiveBayes(training[,1:2], training[,3])
classifier
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = training[, 1:2], y = training[, 3])
##
## A-priori probabilities:
## training[, 3]
## N Y
## 0.4811321 0.5188679
##
## Conditional probabilities:
## Gender
## training[, 3] F M
## N 0.1764706 0.8235294
## Y 0.7818182 0.2181818
##
## TT_gt_100
## training[, 3] N Y
## N 0.7647059 0.2352941
## Y 0.1636364 0.8363636
Predicting Majority Probability Classes on Validation Set
PredY <- predict(classifier, newdata=validation[,-3],
type="class")
PredY
## [1] Y N Y Y Y Y Y N Y Y Y Y N N Y Y N Y Y Y Y N Y Y N Y Y Y N N Y Y Y N Y
## [36] Y N Y N Y N N Y Y
## Levels: N Y
Predicting Probabilites on Validation Set
PredYProb <- predict(classifier, newdata=validation[,-3],
type="raw")
PredYProb
## N Y
## [1,] 0.05560861 0.94439139
## [2,] 0.94238373 0.05761627
## [3,] 0.49613402 0.50386598
## [4,] 0.05560861 0.94439139
## [5,] 0.05560861 0.94439139
## [6,] 0.05560861 0.94439139
## [7,] 0.05560861 0.94439139
## [8,] 0.94238373 0.05761627
## [9,] 0.05560861 0.94439139
## [10,] 0.05560861 0.94439139
## [11,] 0.05560861 0.94439139
## [12,] 0.05560861 0.94439139
## [13,] 0.94238373 0.05761627
## [14,] 0.94238373 0.05761627
## [15,] 0.05560861 0.94439139
## [16,] 0.49613402 0.50386598
## [17,] 0.94238373 0.05761627
## [18,] 0.05560861 0.94439139
## [19,] 0.05560861 0.94439139
## [20,] 0.05560861 0.94439139
## [21,] 0.05560861 0.94439139
## [22,] 0.94238373 0.05761627
## [23,] 0.05560861 0.94439139
## [24,] 0.49613402 0.50386598
## [25,] 0.94238373 0.05761627
## [26,] 0.49446750 0.50553250
## [27,] 0.05560861 0.94439139
## [28,] 0.49446750 0.50553250
## [29,] 0.94238373 0.05761627
## [30,] 0.94238373 0.05761627
## [31,] 0.05560861 0.94439139
## [32,] 0.05560861 0.94439139
## [33,] 0.05560861 0.94439139
## [34,] 0.94238373 0.05761627
## [35,] 0.05560861 0.94439139
## [36,] 0.05560861 0.94439139
## [37,] 0.94238373 0.05761627
## [38,] 0.05560861 0.94439139
## [39,] 0.94238373 0.05761627
## [40,] 0.05560861 0.94439139
## [41,] 0.94238373 0.05761627
## [42,] 0.94238373 0.05761627
## [43,] 0.49613402 0.50386598
## [44,] 0.49613402 0.50386598
Evaluatinn the model
tbl <- table(PredY, validation[,3],
dnn=list('predicted','actual'))
confusionMatrix(tbl,positive = "Y")
## Confusion Matrix and Statistics
##
## actual
## predicted N Y
## N 13 1
## Y 8 22
##
## Accuracy : 0.7955
## 95% CI : (0.647, 0.902)
## No Information Rate : 0.5227
## P-Value [Acc > NIR] : 0.0001702
##
## Kappa : 0.584
## Mcnemar's Test P-Value : 0.0455003
##
## Sensitivity : 0.9565
## Specificity : 0.6190
## Pos Pred Value : 0.7333
## Neg Pred Value : 0.9286
## Prevalence : 0.5227
## Detection Rate : 0.5000
## Detection Prevalence : 0.6818
## Balanced Accuracy : 0.7878
##
## 'Positive' Class : Y
##
ROC
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
plot.roc(validation[,3],PredYProb[,2],legacy.axes=TRUE,print.auc=TRUE )