Loading Dataset

data <- read.csv('C:/Users/Mehedi Hassan Galib/Desktop/Python/datas/tt.csv')
head(data)
##   PassengerId Survived Pclass
## 1           1        0      3
## 2           2        1      1
## 3           3        1      3
## 4           4        1      1
## 5           5        0      3
## 6           6        0      3
##                                                  Name    Sex Age SibSp Parch
## 1                             Braund, Mr. Owen Harris   male  22     1     0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female  38     1     0
## 3                              Heikkinen, Miss. Laina female  26     0     0
## 4        Futrelle, Mrs. Jacques Heath (Lily May Peel) female  35     1     0
## 5                            Allen, Mr. William Henry   male  35     0     0
## 6                                    Moran, Mr. James   male  NA     0     0
##             Ticket    Fare Cabin Embarked
## 1        A/5 21171  7.2500              S
## 2         PC 17599 71.2833   C85        C
## 3 STON/O2. 3101282  7.9250              S
## 4           113803 53.1000  C123        S
## 5           373450  8.0500              S
## 6           330877  8.4583              Q




Remove the unnecessry columns

data$PassengerId <- NULL
data$Name <- NULL
data$SibSp <- NULL
data$Parch <- NULL
data$Ticket <- NULL
data$Cabin <- NULL




Checking NA values

colSums(is.na(data))
## Survived   Pclass      Sex      Age     Fare Embarked 
##        0        0        0      177        0        0




Filing up missing values with mean

for(i in 1:ncol(data)){
  data[is.na(data[,i]), i] <- mean(data[,i], na.rm = TRUE)
}
## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric or
## logical: returning NA

## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric or
## logical: returning NA




Categorical to Factor

data$sex <- factor(data$Sex)
data$embarked <- factor(data$Embarked)
data$survived <- factor(data$Survived)




Remove the previous categorical columns

data$Sex <- NULL
data$Embarked <- NULL
data$Survived <- NULL

str(data)
## 'data.frame':    891 obs. of  6 variables:
##  $ Pclass  : int  3 1 3 1 3 3 1 3 3 2 ...
##  $ Age     : num  22 38 26 35 35 ...
##  $ Fare    : num  7.25 71.28 7.92 53.1 8.05 ...
##  $ sex     : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
##  $ embarked: Factor w/ 4 levels "","C","Q","S": 4 2 4 4 4 3 4 4 4 2 ...
##  $ survived: Factor w/ 2 levels "0","1": 1 2 2 2 1 1 1 1 2 2 ...




train Test split

set.seed(1234)
p_data <-  sample(2, nrow(data), replace = TRUE, prob = c(0.8, 0.2))
train <- data[p_data==1,]
test <- data[p_data==2,]




Loading “Party”

library(party)
## Warning: package 'party' was built under R version 4.0.2
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Warning: package 'strucchange' was built under R version 4.0.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.0.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 4.0.2




Decision Tree

tree <- ctree(survived ~ Pclass+Age+sex+Fare+embarked, data = train,
              controls = ctree_control(mincriterion = 0.999))
plot(tree)




Model Evaluation (Train)


Confussion Matrix

error_table<- table(predict(tree),train$survived)
print(error_table)
##    
##       0   1
##   0 416 104
##   1  21 163


Misclassification Error

1-sum(diag(error_table))/sum(error_table)
## [1] 0.1775568




Model Evaluation (Test)


Confussion Matrix

test_ev <- predict(tree,test)
error_table_test <- table(test_ev, test$survived)
print(error_table_test)
##        
## test_ev   0   1
##       0 107  38
##       1   5  37


Misclassification Error

1-sum(diag(error_table_test))/sum(error_table_test)
## [1] 0.2299465