library(naivebayes)
## naivebayes 0.9.7 loaded
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data <- read.csv("heart.csv", header = TRUE)
str(data)
## 'data.frame': 303 obs. of 14 variables:
## $ age : int 63 37 41 56 57 57 56 44 52 57 ...
## $ sex : int 1 1 0 1 0 1 0 1 1 1 ...
## $ cp : int 3 2 1 1 0 0 1 1 2 2 ...
## $ trestbps: int 145 130 130 120 120 140 140 120 172 150 ...
## $ chol : int 233 250 204 236 354 192 294 263 199 168 ...
## $ fbs : int 1 0 0 0 0 0 0 0 1 0 ...
## $ restecg : int 0 1 0 1 1 1 0 1 1 1 ...
## $ thalach : int 150 187 172 178 163 148 153 173 162 174 ...
## $ exang : int 0 0 0 0 1 0 0 0 0 0 ...
## $ oldpeak : num 2.3 3.5 1.4 0.8 0.6 0.4 1.3 0 0.5 1.6 ...
## $ slope : int 0 0 2 2 2 1 1 2 2 2 ...
## $ ca : int 0 0 0 0 0 0 0 0 0 0 ...
## $ thal : int 1 2 2 2 2 1 2 3 3 2 ...
## $ target : int 1 1 1 1 1 1 1 1 1 1 ...
data$pain <- ifelse(data$cp>0, 1, 0)
data<-select(data,c('age','sex','trestbps','chol','fbs','restecg','exang','target','pain'))
str(data)
## 'data.frame': 303 obs. of 9 variables:
## $ age : int 63 37 41 56 57 57 56 44 52 57 ...
## $ sex : int 1 1 0 1 0 1 0 1 1 1 ...
## $ trestbps: int 145 130 130 120 120 140 140 120 172 150 ...
## $ chol : int 233 250 204 236 354 192 294 263 199 168 ...
## $ fbs : int 1 0 0 0 0 0 0 0 1 0 ...
## $ restecg : int 0 1 0 1 1 1 0 1 1 1 ...
## $ exang : int 0 0 0 0 1 0 0 0 0 0 ...
## $ target : int 1 1 1 1 1 1 1 1 1 1 ...
## $ pain : num 1 1 1 1 0 0 1 1 1 1 ...
xtabs(~pain+target,data=data)
## target
## pain 0 1
## 0 104 39
## 1 34 126
data$sex<-as.factor(data$sex)
data$fbs<-as.factor(data$fbs)
data$restecg<-as.factor(data$restecg)
data$exang<-as.factor(data$exang)
data$target<-as.factor(data$target)
data$pain<-as.factor(data$pain)
str(data)
## 'data.frame': 303 obs. of 9 variables:
## $ age : int 63 37 41 56 57 57 56 44 52 57 ...
## $ sex : Factor w/ 2 levels "0","1": 2 2 1 2 1 2 1 2 2 2 ...
## $ trestbps: int 145 130 130 120 120 140 140 120 172 150 ...
## $ chol : int 233 250 204 236 354 192 294 263 199 168 ...
## $ fbs : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
## $ restecg : Factor w/ 3 levels "0","1","2": 1 2 1 2 2 2 1 2 2 2 ...
## $ exang : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
## $ target : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ pain : Factor w/ 2 levels "0","1": 2 2 2 2 1 1 2 2 2 2 ...
plot(data$chol,data$trestbps)

ind<-sample(2,nrow(data),replace=T,prob=c(0.8,0.2))
train<-data[ind==1,]
test<-data[ind==2,]
model<-naive_bayes(pain~.,data=train)
model
##
## ================================== Naive Bayes ==================================
##
## Call:
## naive_bayes.formula(formula = pain ~ ., data = train)
##
## ---------------------------------------------------------------------------------
##
## Laplace smoothing: 0
##
## ---------------------------------------------------------------------------------
##
## A priori probabilities:
##
## 0 1
## 0.4736842 0.5263158
##
## ---------------------------------------------------------------------------------
##
## Tables:
##
## ---------------------------------------------------------------------------------
## ::: age (Gaussian)
## ---------------------------------------------------------------------------------
##
## age 0 1
## mean 56.145299 53.976923
## sd 8.022882 9.192570
##
## ---------------------------------------------------------------------------------
## ::: sex (Bernoulli)
## ---------------------------------------------------------------------------------
##
## sex 0 1
## 0 0.2307692 0.3692308
## 1 0.7692308 0.6307692
##
## ---------------------------------------------------------------------------------
## ::: trestbps (Gaussian)
## ---------------------------------------------------------------------------------
##
## trestbps 0 1
## mean 130.71795 131.77692
## sd 16.78045 16.78421
##
## ---------------------------------------------------------------------------------
## ::: chol (Gaussian)
## ---------------------------------------------------------------------------------
##
## chol 0 1
## mean 251.09402 243.41538
## sd 52.54013 54.13431
##
## ---------------------------------------------------------------------------------
## ::: fbs (Bernoulli)
## ---------------------------------------------------------------------------------
##
## fbs 0 1
## 0 0.8717949 0.8307692
## 1 0.1282051 0.1692308
##
## ---------------------------------------------------------------------------------
##
## # ... and 3 more tables
##
## ---------------------------------------------------------------------------------
plot(model)








p<-predict(model,test,type = 'prob')
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
head(cbind(p,test))
## 0 1 age sex trestbps chol fbs restecg exang target pain
## 4 0.16007809 0.8399219 56 1 120 236 0 1 0 1 1
## 11 0.14382359 0.8561764 54 1 140 239 0 1 0 1 0
## 18 0.08373636 0.9162636 66 0 150 226 0 1 0 1 1
## 21 0.16003882 0.8399612 59 1 135 234 0 1 0 1 0
## 22 0.52866861 0.4713314 44 1 130 233 0 1 1 1 1
## 27 0.10802473 0.8919753 59 1 150 212 1 1 0 1 1
p1<-predict(model,test)
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
(tab1<-table(p1,test$pain))
##
## p1 0 1
## 0 17 8
## 1 9 22
1-sum(diag(tab1))/sum(tab1)
## [1] 0.3035714
model<-naive_bayes(pain~.,data=train,usekernel = T)
p1<-predict(model,test)
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
(tab1<-table(p1,test$pain))
##
## p1 0 1
## 0 17 7
## 1 9 23
1-sum(diag(tab1))/sum(tab1)
## [1] 0.2857143