data <- read.csv('C:/Users/Mehedi Hassan Galib/Desktop/Python/datas/tt.csv')
head(data)
## PassengerId Survived Pclass
## 1 1 0 3
## 2 2 1 1
## 3 3 1 3
## 4 4 1 1
## 5 5 0 3
## 6 6 0 3
## Name Sex Age SibSp Parch
## 1 Braund, Mr. Owen Harris male 22 1 0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38 1 0
## 3 Heikkinen, Miss. Laina female 26 0 0
## 4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 0
## 5 Allen, Mr. William Henry male 35 0 0
## 6 Moran, Mr. James male NA 0 0
## Ticket Fare Cabin Embarked
## 1 A/5 21171 7.2500 S
## 2 PC 17599 71.2833 C85 C
## 3 STON/O2. 3101282 7.9250 S
## 4 113803 53.1000 C123 S
## 5 373450 8.0500 S
## 6 330877 8.4583 Q
data$PassengerId <- NULL
data$Name <- NULL
data$SibSp <- NULL
data$Parch <- NULL
data$Ticket <- NULL
data$Cabin <- NULL
colSums(is.na(data))
## Survived Pclass Sex Age Fare Embarked
## 0 0 0 177 0 0
for(i in 1:ncol(data)){
data[is.na(data[,i]), i] <- mean(data[,i], na.rm = TRUE)
}
## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric or
## logical: returning NA
## Warning in mean.default(data[, i], na.rm = TRUE): argument is not numeric or
## logical: returning NA
data$sex <- factor(data$Sex)
data$embarked <- factor(data$Embarked)
data$survived <- factor(data$Survived)
data$Sex <- NULL
data$Embarked <- NULL
data$Survived <- NULL
str(data)
## 'data.frame': 891 obs. of 6 variables:
## $ Pclass : int 3 1 3 1 3 3 1 3 3 2 ...
## $ Age : num 22 38 26 35 35 ...
## $ Fare : num 7.25 71.28 7.92 53.1 8.05 ...
## $ sex : Factor w/ 2 levels "female","male": 2 1 1 1 2 2 2 2 1 1 ...
## $ embarked: Factor w/ 4 levels "","C","Q","S": 4 2 4 4 4 3 4 4 4 2 ...
## $ survived: Factor w/ 2 levels "0","1": 1 2 2 2 1 1 1 1 2 2 ...
set.seed(1234)
p_data <- sample(2, nrow(data), replace = TRUE, prob = c(0.8, 0.2))
train <- data[p_data==1,]
test <- data[p_data==2,]
library(party)
## Warning: package 'party' was built under R version 4.0.2
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Warning: package 'strucchange' was built under R version 4.0.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.0.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 4.0.2
tree <- ctree(survived ~ Pclass+Age+sex+Fare+embarked, data = train,
controls = ctree_control(mincriterion = 0.999))
plot(tree)
error_table<- table(predict(tree),train$survived)
print(error_table)
##
## 0 1
## 0 416 104
## 1 21 163
1-sum(diag(error_table))/sum(error_table)
## [1] 0.1775568
test_ev <- predict(tree,test)
error_table_test <- table(test_ev, test$survived)
print(error_table_test)
##
## test_ev 0 1
## 0 107 38
## 1 5 37
1-sum(diag(error_table_test))/sum(error_table_test)
## [1] 0.2299465