Neural Network

library(e1071)

# Load data
t <- read.table('./data/titanic.data')
colnames(t) <- c('class', 'age', 'sex', 'survive')

# Create train and test sets:
sample_size <- floor(0.7 * nrow(t))
training_index <- sample(seq_len(nrow(t)), size = sample_size)
train <- t[training_index,]
test <- t[-training_index,]

# Load package nnet for nnet
library(nnet)
t.nnet <- nnet(formula = survive ~ age + sex + class, data =train, size = 4)
## # weights:  29
## initial  value 1053.579479 
## iter  10 value 773.795655
## iter  20 value 742.230361
## iter  30 value 737.236504
## iter  40 value 735.383260
## iter  50 value 734.203507
## iter  60 value 734.176555
## iter  70 value 734.132039
## iter  80 value 733.787729
## iter  90 value 732.442436
## iter 100 value 732.392192
## final  value 732.392192 
## stopped after 100 iterations
t.pred <- predict(t.nnet, test, type='class')
results <- data.frame(actual = test[,'survive'], predicted = t.pred)
results$correct <- ifelse(results$actual == results$predicted,1,0)

# load package caret
library(caret)
## Warning: package 'caret' was built under R version 3.2.3
## Warning: package 'ggplot2' was built under R version 3.2.3
results.matrix <- confusionMatrix(results$predicted, results$actual)
results.matrix
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  no yes
##        no  449 136
##        yes   4  72
##                                          
##                Accuracy : 0.7882         
##                  95% CI : (0.755, 0.8188)
##     No Information Rate : 0.6853         
##     P-Value [Acc > NIR] : 2.392e-09      
##                                          
##                   Kappa : 0.4072         
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9912         
##             Specificity : 0.3462         
##          Pos Pred Value : 0.7675         
##          Neg Pred Value : 0.9474         
##              Prevalence : 0.6853         
##          Detection Rate : 0.6793         
##    Detection Prevalence : 0.8850         
##       Balanced Accuracy : 0.6687         
##                                          
##        'Positive' Class : no             
## 
results.matrix$table
##           Reference
## Prediction  no yes
##        no  449 136
##        yes   4  72

Naive Bayes

t.nb <- naiveBayes(survive ~ age + sex + class, data =train)
t.pred <- predict(t.nb, test, type='class')
results <- data.frame(actual = test[,'survive'], predicted = t.pred)
results$correct <- ifelse(results$actual == results$predicted,1,0)
results.matrix <- confusionMatrix(results$predicted, results$actual)
results.matrix
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  no yes
##        no  417 110
##        yes  36  98
##                                           
##                Accuracy : 0.7791          
##                  95% CI : (0.7455, 0.8102)
##     No Information Rate : 0.6853          
##     P-Value [Acc > NIR] : 5.384e-08       
##                                           
##                   Kappa : 0.4334          
##  Mcnemar's Test P-Value : 1.527e-09       
##                                           
##             Sensitivity : 0.9205          
##             Specificity : 0.4712          
##          Pos Pred Value : 0.7913          
##          Neg Pred Value : 0.7313          
##              Prevalence : 0.6853          
##          Detection Rate : 0.6309          
##    Detection Prevalence : 0.7973          
##       Balanced Accuracy : 0.6958          
##                                           
##        'Positive' Class : no              
## 
results.matrix$table
##           Reference
## Prediction  no yes
##        no  417 110
##        yes  36  98

Decision Tree

## 3 Decision Trees in R
# intall C5.0 algorithm packages
library(C50)

# Train the model
predictors <- c('class', 'age', 'sex')
# fit the model
model <- C5.0.default(x = train[, predictors], y = train$survive)

# evaluate model performance
pred <- predict(model, newdata = test, type = 'class')
results <- data.frame(actual = test[,'survive'], predicted = pred)
results$correct <- ifelse(results$actual == results$predicted,1,0)
results.matrix <- confusionMatrix(results$predicted, results$actual)
results.matrix
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  no yes
##        no  449 142
##        yes   4  66
##                                           
##                Accuracy : 0.7791          
##                  95% CI : (0.7455, 0.8102)
##     No Information Rate : 0.6853          
##     P-Value [Acc > NIR] : 5.384e-08       
##                                           
##                   Kappa : 0.3759          
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9912          
##             Specificity : 0.3173          
##          Pos Pred Value : 0.7597          
##          Neg Pred Value : 0.9429          
##              Prevalence : 0.6853          
##          Detection Rate : 0.6793          
##    Detection Prevalence : 0.8941          
##       Balanced Accuracy : 0.6542          
##                                           
##        'Positive' Class : no              
## 
results.matrix$table
##           Reference
## Prediction  no yes
##        no  449 142
##        yes   4  66