Neural Network
library(e1071)
# Load data
t <- read.table('./data/titanic.data')
colnames(t) <- c('class', 'age', 'sex', 'survive')
# Create train and test sets:
sample_size <- floor(0.7 * nrow(t))
training_index <- sample(seq_len(nrow(t)), size = sample_size)
train <- t[training_index,]
test <- t[-training_index,]
# Load package nnet for nnet
library(nnet)
t.nnet <- nnet(formula = survive ~ age + sex + class, data =train, size = 4)
## # weights: 29
## initial value 1053.579479
## iter 10 value 773.795655
## iter 20 value 742.230361
## iter 30 value 737.236504
## iter 40 value 735.383260
## iter 50 value 734.203507
## iter 60 value 734.176555
## iter 70 value 734.132039
## iter 80 value 733.787729
## iter 90 value 732.442436
## iter 100 value 732.392192
## final value 732.392192
## stopped after 100 iterations
t.pred <- predict(t.nnet, test, type='class')
results <- data.frame(actual = test[,'survive'], predicted = t.pred)
results$correct <- ifelse(results$actual == results$predicted,1,0)
# load package caret
library(caret)
## Warning: package 'caret' was built under R version 3.2.3
## Warning: package 'ggplot2' was built under R version 3.2.3
results.matrix <- confusionMatrix(results$predicted, results$actual)
results.matrix
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 449 136
## yes 4 72
##
## Accuracy : 0.7882
## 95% CI : (0.755, 0.8188)
## No Information Rate : 0.6853
## P-Value [Acc > NIR] : 2.392e-09
##
## Kappa : 0.4072
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9912
## Specificity : 0.3462
## Pos Pred Value : 0.7675
## Neg Pred Value : 0.9474
## Prevalence : 0.6853
## Detection Rate : 0.6793
## Detection Prevalence : 0.8850
## Balanced Accuracy : 0.6687
##
## 'Positive' Class : no
##
results.matrix$table
## Reference
## Prediction no yes
## no 449 136
## yes 4 72
Naive Bayes
t.nb <- naiveBayes(survive ~ age + sex + class, data =train)
t.pred <- predict(t.nb, test, type='class')
results <- data.frame(actual = test[,'survive'], predicted = t.pred)
results$correct <- ifelse(results$actual == results$predicted,1,0)
results.matrix <- confusionMatrix(results$predicted, results$actual)
results.matrix
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 417 110
## yes 36 98
##
## Accuracy : 0.7791
## 95% CI : (0.7455, 0.8102)
## No Information Rate : 0.6853
## P-Value [Acc > NIR] : 5.384e-08
##
## Kappa : 0.4334
## Mcnemar's Test P-Value : 1.527e-09
##
## Sensitivity : 0.9205
## Specificity : 0.4712
## Pos Pred Value : 0.7913
## Neg Pred Value : 0.7313
## Prevalence : 0.6853
## Detection Rate : 0.6309
## Detection Prevalence : 0.7973
## Balanced Accuracy : 0.6958
##
## 'Positive' Class : no
##
results.matrix$table
## Reference
## Prediction no yes
## no 417 110
## yes 36 98
Decision Tree
## 3 Decision Trees in R
# intall C5.0 algorithm packages
library(C50)
# Train the model
predictors <- c('class', 'age', 'sex')
# fit the model
model <- C5.0.default(x = train[, predictors], y = train$survive)
# evaluate model performance
pred <- predict(model, newdata = test, type = 'class')
results <- data.frame(actual = test[,'survive'], predicted = pred)
results$correct <- ifelse(results$actual == results$predicted,1,0)
results.matrix <- confusionMatrix(results$predicted, results$actual)
results.matrix
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 449 142
## yes 4 66
##
## Accuracy : 0.7791
## 95% CI : (0.7455, 0.8102)
## No Information Rate : 0.6853
## P-Value [Acc > NIR] : 5.384e-08
##
## Kappa : 0.3759
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9912
## Specificity : 0.3173
## Pos Pred Value : 0.7597
## Neg Pred Value : 0.9429
## Prevalence : 0.6853
## Detection Rate : 0.6793
## Detection Prevalence : 0.8941
## Balanced Accuracy : 0.6542
##
## 'Positive' Class : no
##
results.matrix$table
## Reference
## Prediction no yes
## no 449 142
## yes 4 66