Activate Libraries to use
library(readr)
## Warning: package 'readr' was built under R version 3.4.4
library(rpart)
## Warning: package 'rpart' was built under R version 3.4.4
library(caTools)
## Warning: package 'caTools' was built under R version 3.4.4
set.seed(123)
Open the Titanic Dataset
dataset <- read_csv("c:\\Users\\Wilson\\Downloads\\KEC-Titanic-train.csv")
## Parsed with column specification:
## cols(
## PassengerId = col_integer(),
## Survived = col_integer(),
## Pclass = col_integer(),
## Name = col_character(),
## Sex = col_character(),
## Age = col_double(),
## SibSp = col_integer(),
## Parch = col_integer(),
## Ticket = col_character(),
## Fare = col_double(),
## Cabin = col_character(),
## Embarked = col_character()
## )
# Select the relevant fields (Passenger Class, Sex, Age, Siblings, Parents, Fare)
df <- dataset[,c(2,3,5:8,10)]
head(df)
## # A tibble: 6 x 7
## Survived Pclass Sex Age SibSp Parch Fare
## <int> <int> <chr> <dbl> <int> <int> <dbl>
## 1 0 3 male 22 1 0 7.2500
## 2 1 1 female 38 1 0 71.2833
## 3 1 3 female 26 0 0 7.9250
## 4 1 1 female 35 1 0 53.1000
## 5 0 3 male 35 0 0 8.0500
## 6 0 3 male NA 0 0 8.4583
Data Preparation
# Factorize Survived Field
df$Survived = factor(df$Survived, levels = c(0, 1))
# Feature Scaling fare and age
# df[7] <- scale(df[7])
# df[4] <- scale(df[4])
Classifying
classifier = rpart(formula = Survived ~., data= df)
plot(classifier)
text(classifier, cex = .6)
