library(naivebayes)
## naivebayes 0.9.7 loaded
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data <- read.csv("heart.csv", header = TRUE)
str(data)
## 'data.frame':    303 obs. of  14 variables:
##  $ age     : int  63 37 41 56 57 57 56 44 52 57 ...
##  $ sex     : int  1 1 0 1 0 1 0 1 1 1 ...
##  $ cp      : int  3 2 1 1 0 0 1 1 2 2 ...
##  $ trestbps: int  145 130 130 120 120 140 140 120 172 150 ...
##  $ chol    : int  233 250 204 236 354 192 294 263 199 168 ...
##  $ fbs     : int  1 0 0 0 0 0 0 0 1 0 ...
##  $ restecg : int  0 1 0 1 1 1 0 1 1 1 ...
##  $ thalach : int  150 187 172 178 163 148 153 173 162 174 ...
##  $ exang   : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ oldpeak : num  2.3 3.5 1.4 0.8 0.6 0.4 1.3 0 0.5 1.6 ...
##  $ slope   : int  0 0 2 2 2 1 1 2 2 2 ...
##  $ ca      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ thal    : int  1 2 2 2 2 1 2 3 3 2 ...
##  $ target  : int  1 1 1 1 1 1 1 1 1 1 ...
data$pain <- ifelse(data$cp>0, 1, 0)
data<-select(data,c('age','sex','trestbps','chol','fbs','restecg','exang','target','pain'))
str(data)
## 'data.frame':    303 obs. of  9 variables:
##  $ age     : int  63 37 41 56 57 57 56 44 52 57 ...
##  $ sex     : int  1 1 0 1 0 1 0 1 1 1 ...
##  $ trestbps: int  145 130 130 120 120 140 140 120 172 150 ...
##  $ chol    : int  233 250 204 236 354 192 294 263 199 168 ...
##  $ fbs     : int  1 0 0 0 0 0 0 0 1 0 ...
##  $ restecg : int  0 1 0 1 1 1 0 1 1 1 ...
##  $ exang   : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ target  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ pain    : num  1 1 1 1 0 0 1 1 1 1 ...
xtabs(~pain+target,data=data)
##     target
## pain   0   1
##    0 104  39
##    1  34 126
data$sex<-as.factor(data$sex)
data$fbs<-as.factor(data$fbs)
data$restecg<-as.factor(data$restecg)
data$exang<-as.factor(data$exang)
data$target<-as.factor(data$target)
data$pain<-as.factor(data$pain)
str(data)
## 'data.frame':    303 obs. of  9 variables:
##  $ age     : int  63 37 41 56 57 57 56 44 52 57 ...
##  $ sex     : Factor w/ 2 levels "0","1": 2 2 1 2 1 2 1 2 2 2 ...
##  $ trestbps: int  145 130 130 120 120 140 140 120 172 150 ...
##  $ chol    : int  233 250 204 236 354 192 294 263 199 168 ...
##  $ fbs     : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
##  $ restecg : Factor w/ 3 levels "0","1","2": 1 2 1 2 2 2 1 2 2 2 ...
##  $ exang   : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
##  $ target  : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
##  $ pain    : Factor w/ 2 levels "0","1": 2 2 2 2 1 1 2 2 2 2 ...
plot(data$chol,data$trestbps)

ind<-sample(2,nrow(data),replace=T,prob=c(0.8,0.2))
train<-data[ind==1,]
test<-data[ind==2,]
model<-naive_bayes(pain~.,data=train)
model
## 
## ================================== Naive Bayes ================================== 
##  
##  Call: 
## naive_bayes.formula(formula = pain ~ ., data = train)
## 
## --------------------------------------------------------------------------------- 
##  
## Laplace smoothing: 0
## 
## --------------------------------------------------------------------------------- 
##  
##  A priori probabilities: 
## 
##         0         1 
## 0.4736842 0.5263158 
## 
## --------------------------------------------------------------------------------- 
##  
##  Tables: 
## 
## --------------------------------------------------------------------------------- 
##  ::: age (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## age            0         1
##   mean 56.145299 53.976923
##   sd    8.022882  9.192570
## 
## --------------------------------------------------------------------------------- 
##  ::: sex (Bernoulli) 
## --------------------------------------------------------------------------------- 
##    
## sex         0         1
##   0 0.2307692 0.3692308
##   1 0.7692308 0.6307692
## 
## --------------------------------------------------------------------------------- 
##  ::: trestbps (Gaussian) 
## --------------------------------------------------------------------------------- 
##         
## trestbps         0         1
##     mean 130.71795 131.77692
##     sd    16.78045  16.78421
## 
## --------------------------------------------------------------------------------- 
##  ::: chol (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## chol           0         1
##   mean 251.09402 243.41538
##   sd    52.54013  54.13431
## 
## --------------------------------------------------------------------------------- 
##  ::: fbs (Bernoulli) 
## --------------------------------------------------------------------------------- 
##    
## fbs         0         1
##   0 0.8717949 0.8307692
##   1 0.1282051 0.1692308
## 
## ---------------------------------------------------------------------------------
## 
## # ... and 3 more tables
## 
## ---------------------------------------------------------------------------------
plot(model)

p<-predict(model,test,type = 'prob')
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
head(cbind(p,test))
##             0         1 age sex trestbps chol fbs restecg exang target pain
## 4  0.16007809 0.8399219  56   1      120  236   0       1     0      1    1
## 11 0.14382359 0.8561764  54   1      140  239   0       1     0      1    0
## 18 0.08373636 0.9162636  66   0      150  226   0       1     0      1    1
## 21 0.16003882 0.8399612  59   1      135  234   0       1     0      1    0
## 22 0.52866861 0.4713314  44   1      130  233   0       1     1      1    1
## 27 0.10802473 0.8919753  59   1      150  212   1       1     0      1    1
p1<-predict(model,test)
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
(tab1<-table(p1,test$pain))
##    
## p1   0  1
##   0 17  8
##   1  9 22
1-sum(diag(tab1))/sum(tab1)
## [1] 0.3035714
model<-naive_bayes(pain~.,data=train,usekernel = T)
p1<-predict(model,test)
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
(tab1<-table(p1,test$pain))
##    
## p1   0  1
##   0 17  7
##   1  9 23
1-sum(diag(tab1))/sum(tab1)
## [1] 0.2857143