匯入檔案

require(e1071)
require(dplyr)
data('Titanic')
data <- as.data.frame(Titanic)

檢視資料

data %>% head(6)
##   Class    Sex   Age Survived Freq
## 1   1st   Male Child       No    0
## 2   2nd   Male Child       No    0
## 3   3rd   Male Child       No   35
## 4  Crew   Male Child       No    0
## 5   1st Female Child       No    0
## 6   2nd Female Child       No    0
data %>% str()
## 'data.frame':    32 obs. of  5 variables:
##  $ Class   : Factor w/ 4 levels "1st","2nd","3rd",..: 1 2 3 4 1 2 3 4 1 2 ...
##  $ Sex     : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 2 2 2 1 1 ...
##  $ Age     : Factor w/ 2 levels "Child","Adult": 1 1 1 1 1 1 1 1 2 2 ...
##  $ Survived: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Freq    : num  0 0 35 0 0 0 17 0 118 154 ...

可以看到以下欄位

建立資料

# 資料還原
repeating_sequence <- rep(1:nrow(data), data$Freq)
rawdata <- data[repeating_sequence,]
rawdata$Freq <- NULL

建立模型(Naive Bayes Model)

naiveBayesModel <- naiveBayes(Survived ~ .,data = rawdata)

naiveBayesModel
## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
## 
## A-priori probabilities:
## Y
##       No      Yes 
## 0.676965 0.323035 
## 
## Conditional probabilities:
##      Class
## Y            1st        2nd        3rd       Crew
##   No  0.08187919 0.11208054 0.35436242 0.45167785
##   Yes 0.28551336 0.16596343 0.25035162 0.29817159
## 
##      Sex
## Y           Male     Female
##   No  0.91543624 0.08456376
##   Yes 0.51617440 0.48382560
## 
##      Age
## Y          Child      Adult
##   No  0.03489933 0.96510067
##   Yes 0.08016878 0.91983122

預測

survivedPred <- predict(naiveBayesModel,rawdata)

table(survivedPred,rawdata$Survived)
##             
## survivedPred   No  Yes
##          No  1364  362
##          Yes  126  349

另解

require(mlr)
## Loading required package: mlr
## Loading required package: ParamHelpers
## 
## Attaching package: 'mlr'
## The following object is masked from 'package:e1071':
## 
##     impute
# 設定資料和分類目標
task <- makeClassifTask(data = rawdata, target = "Survived")
# 設定模型種類
modelType <- makeLearner("classif.naiveBayes")
# 建立模型
naiveBayesModel <- train(task,learner = modelType)
# 檢視模型
naiveBayesModel$learner.model
## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
## 
## A-priori probabilities:
## Y
##       No      Yes 
## 0.676965 0.323035 
## 
## Conditional probabilities:
##      Class
## Y            1st        2nd        3rd       Crew
##   No  0.08187919 0.11208054 0.35436242 0.45167785
##   Yes 0.28551336 0.16596343 0.25035162 0.29817159
## 
##      Sex
## Y           Male     Female
##   No  0.91543624 0.08456376
##   Yes 0.51617440 0.48382560
## 
##      Age
## Y          Child      Adult
##   No  0.03489933 0.96510067
##   Yes 0.08016878 0.91983122