library(readr)
T3 <- read_csv("https://goo.gl/At238b")
## Rows: 1309 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): pclass, name, sex, ticket, cabin, embarked, boat, home.dest
## dbl (6): survived, age, sibsp, parch, fare, body
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(T3)
A new dataset called titanic is built.
It contains the following features:
Survived: contains passengers who survive and those who didn’t.
Embarked: contains the port from which the passengers boarded the ship.
Sex: contains the gender of the passengers.
Sibsp: contains the number siblings or spouses each passenger onboard.
Parch: contains the number of parents or children of each passenger onboard.
Fare: contains the price of the tickets for each passenger.
Age: contains the age of each passenger.
T3_drop<-c("pclass","name","ticket","cabin","boat","body","home.dest")
titanic=T3[,!(names(T3)%in%T3_drop)]
View(titanic)
any(is.na(titanic))
## [1] TRUE
titanic_1<-na.omit(titanic)
#reconfirmation
any(is.na(titanic_1))
## [1] FALSE
summary(titanic_1)
## survived sex age sibsp
## Min. :0.0000 Length:1043 Min. : 0.17 Min. :0.0000
## 1st Qu.:0.0000 Class :character 1st Qu.:21.00 1st Qu.:0.0000
## Median :0.0000 Mode :character Median :28.00 Median :0.0000
## Mean :0.4075 Mean :29.81 Mean :0.5043
## 3rd Qu.:1.0000 3rd Qu.:39.00 3rd Qu.:1.0000
## Max. :1.0000 Max. :80.00 Max. :8.0000
## parch fare embarked
## Min. :0.0000 Min. : 0.00 Length:1043
## 1st Qu.:0.0000 1st Qu.: 8.05 Class :character
## Median :0.0000 Median : 15.75 Mode :character
## Mean :0.4219 Mean : 36.60
## 3rd Qu.:1.0000 3rd Qu.: 35.08
## Max. :6.0000 Max. :512.33
str(titanic_1)
## tibble [1,043 × 7] (S3: tbl_df/tbl/data.frame)
## $ survived: num [1:1043] 1 1 0 0 0 1 1 0 1 0 ...
## $ sex : chr [1:1043] "female" "male" "female" "male" ...
## $ age : num [1:1043] 29 0.92 2 30 25 48 63 39 53 71 ...
## $ sibsp : num [1:1043] 0 1 1 1 1 0 1 0 2 0 ...
## $ parch : num [1:1043] 0 2 2 2 2 0 0 0 0 0 ...
## $ fare : num [1:1043] 211 152 152 152 152 ...
## $ embarked: chr [1:1043] "S" "S" "S" "S" ...
## - attr(*, "na.action")= 'omit' Named int [1:266] 16 38 41 47 60 70 71 75 81 107 ...
## ..- attr(*, "names")= chr [1:266] "16" "38" "41" "47" ...
dim(titanic_1)
## [1] 1043 7
sapply(titanic_1, class)
## survived sex age sibsp parch fare
## "numeric" "character" "numeric" "numeric" "numeric" "numeric"
## embarked
## "character"
sapply(titanic_1,sd)
## Warning in var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm =
## na.rm): NAs introduced by coercion
## Warning in var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm =
## na.rm): NAs introduced by coercion
## survived sex age sibsp parch fare embarked
## 0.4916009 NA 14.3662545 0.9130797 0.8406546 55.7536477 NA
library(mlbench)
library(e1071)
skew_titanic<-data.frame(titanic_1$survived, titanic_1$age, titanic_1$sibsp, titanic_1$parch, titanic_1$fare)
skew<-apply(skew_titanic, 2, skewness)
print(skew)
## titanic_1.survived titanic_1.age titanic_1.sibsp titanic_1.parch
## 0.3760484 0.4057134 2.7984033 2.6527564
## titanic_1.fare
## 4.1106587
plot(skew, type="b", col='black')
cor_titanic<-data.frame(titanic_1$survived, titanic_1$age, titanic_1$sibsp, titanic_1$parch, titanic_1$fare)
cor(cor_titanic)
## titanic_1.survived titanic_1.age titanic_1.sibsp
## titanic_1.survived 1.00000000 -0.05741486 -0.01140343
## titanic_1.age -0.05741486 1.00000000 -0.24234489
## titanic_1.sibsp -0.01140343 -0.24234489 1.00000000
## titanic_1.parch 0.11543601 -0.14931063 0.37395967
## titanic_1.fare 0.24785762 0.17720569 0.14213054
## titanic_1.parch titanic_1.fare
## titanic_1.survived 0.1154360 0.2478576
## titanic_1.age -0.1493106 0.1772057
## titanic_1.sibsp 0.3739597 0.1421305
## titanic_1.parch 1.0000000 0.2176495
## titanic_1.fare 0.2176495 1.0000000
table(titanic_1$survived)
##
## 0 1
## 618 425
table(titanic_1$sex)
##
## female male
## 386 657
Embarked: There were 3 different ports( C, Q, S).
212(20.3)% passengers came in through port C(Cherbourg).
50(4.8%) passengers came in through port Q(Queenstown).
781(74.9%) passengers came in through port S(Southampton).
table(titanic_1$embarked)
##
## C Q S
## 212 50 781
Sibsp:
There were 682 passengers with no siblings or spouses onboard.
280 passengers had 1 sibling or spouse onboard.
36 passengers had 2 siblings or spouses onboard.
16 passengers had 3 siblings or spouses onboard.
22 passengers had 4 siblings or spouses onboard.
6 passengers had 5 siblings or spouses onboard.
1 passenger had 8 siblings or spouses onboard.
table(titanic_1$sibsp)
##
## 0 1 2 3 4 5 8
## 682 280 36 16 22 6 1
Parch:
There were 765 passengers with no parents or children onboard.
160 passengers had 1 parent or child onboard.
97 passengers had 2 parents or children onboard.
8 passengers had 3 parents or children onboard.
5 passengers had 4 parents or children onboard.
6 passengers had 5 parents or children onboard.
2 passengers had 6 parents or children onboard.
table(titanic_1$parch)
##
## 0 1 2 3 4 5 6
## 765 160 97 8 5 6 2
table(titanic_1$fare)
##
## 0 3.1708 4.0125 5 6.2375 6.4375 6.45 6.4958
## 8 1 1 1 1 1 1 3
## 6.75 6.95 6.975 7 7.0458 7.05 7.0542 7.125
## 2 1 2 1 1 7 2 4
## 7.1417 7.225 7.2292 7.25 7.2833 7.4958 7.5208 7.55
## 1 13 13 14 1 3 1 3
## 7.5792 7.6292 7.65 7.725 7.7333 7.7417 7.75 7.775
## 1 1 6 1 4 1 20 23
## 7.7958 7.8 7.8208 7.8292 7.85 7.8542 7.875 7.8792
## 10 1 1 1 1 21 1 4
## 7.8875 7.8958 7.925 8.0292 8.05 8.1583 8.3 8.3625
## 1 30 23 1 38 1 1 1
## 8.4042 8.4333 8.5167 8.6542 8.6625 8.6833 8.85 8.9625
## 1 1 2 1 20 1 1 1
## 9 9.2167 9.225 9.325 9.35 9.475 9.4833 9.5
## 2 1 3 1 3 1 1 11
## 9.5875 9.6875 9.825 9.8375 9.8417 9.8458 10.1708 10.4625
## 2 1 2 1 1 1 1 2
## 10.5 10.5167 11.1333 11.2417 11.5 12 12.1833 12.275
## 35 1 3 2 6 1 2 1
## 12.2875 12.35 12.475 12.525 12.65 12.7375 12.875 13
## 2 4 4 1 1 1 1 58
## 13.4167 13.5 13.775 13.7917 13.8583 13.8625 13.9 14
## 2 7 3 1 3 1 2 1
## 14.1083 14.4 14.4542 14.4583 14.5 15 15.0333 15.0458
## 2 3 7 1 7 1 1 2
## 15.1 15.2458 15.5 15.55 15.7417 15.75 15.85 15.9
## 1 5 2 2 3 2 4 3
## 16 16.1 16.7 17.4 17.8 18 18.75 18.7875
## 2 8 3 2 2 4 3 2
## 19.2583 19.5 20.2125 20.25 20.525 20.575 21 21.075
## 4 2 3 3 3 4 13 5
## 22.025 22.525 23 24 24.15 25.5875 25.7 25.9292
## 3 3 7 2 6 1 1 2
## 26 26.25 26.2833 26.2875 26.3875 26.55 27 27.4458
## 48 6 1 3 1 18 2 1
## 27.7208 27.75 27.9 28.5 28.5375 28.7125 29 29.125
## 8 5 6 2 1 1 3 6
## 29.7 30 30.0708 30.5 30.6958 31 31.275 31.3875
## 4 6 2 5 1 2 7 7
## 31.5 31.6792 32.3208 32.5 33 33.5 34.0208 34.375
## 3 1 1 3 2 1 1 5
## 34.6542 35.5 36.75 37.0042 38.5 39 39.4 39.6
## 1 3 4 3 1 7 2 1
## 39.6875 40.125 41.5792 42.4 42.5 45.5 46.9 47.1
## 7 1 4 1 1 1 8 2
## 49.5 49.5042 50 50.4958 51.4792 51.8625 52 52.5542
## 1 2 1 2 2 2 6 4
## 53.1 55 55.4417 55.9 56.4958 56.9292 57 57.75
## 6 1 4 2 4 2 2 2
## 57.9792 59.4 60 61.175 61.3792 61.9792 63.3583 65
## 2 4 2 2 2 2 2 5
## 66.6 69.3 69.55 71 71.2833 73.5 75.2417 75.25
## 2 2 1 2 2 7 2 2
## 76.2917 76.7292 77.2875 77.9583 78.2667 78.85 79.2 79.65
## 2 3 2 3 2 3 5 3
## 81.8583 82.1708 82.2667 83.1583 83.475 86.5 89.1042 90
## 3 1 2 6 2 3 1 5
## 91.0792 93.5 106.425 108.9 110.8833 113.275 120 133.65
## 2 4 3 3 3 3 4 1
## 134.5 135.6333 136.7792 146.5208 151.55 153.4625 164.8667 211.3375
## 5 4 2 2 6 3 4 4
## 211.5 221.7792 227.525 247.5208 262.375 263 512.3292
## 5 3 4 3 7 6 4
plot(titanic_1$fare, type='l')
summary(titanic_1$fare)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 8.05 15.75 36.60 35.08 512.33
Age:
The oldest passenger onboard was 80 years old.
The youngest passenger onboard was 17 months old.
The average age for the passengers onboard is approximately 30 years.
table(titanic_1$age)
##
## 0.17 0.33 0.42 0.67 0.75 0.83 0.92 1 2 3 4 5 6 7 8 9
## 1 1 1 1 3 3 2 10 12 7 10 5 6 4 6 10
## 10 11 11.5 12 13 14 14.5 15 16 17 18 18.5 19 20 20.5 21
## 4 4 1 3 5 8 2 6 19 20 39 3 29 23 1 41
## 22 22.5 23 23.5 24 24.5 25 26 26.5 27 28 28.5 29 30 30.5 31
## 43 1 26 1 47 1 34 30 1 30 32 3 30 40 2 23
## 32 32.5 33 34 34.5 35 36 36.5 37 38 38.5 39 40 40.5 41 42
## 24 4 21 16 2 23 31 2 9 13 1 20 18 3 11 18
## 43 44 45 45.5 46 47 48 49 50 51 52 53 54 55 55.5 56
## 9 10 21 2 6 14 14 9 15 8 6 4 10 8 1 4
## 57 58 59 60 61 62 63 64 65 66 67 70 70.5 71 74 76
## 5 6 3 7 5 4 4 5 3 1 1 2 1 2 1 1
## 80
## 1
summary(titanic_1$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.17 21.00 28.00 29.81 39.00 80.00
suppressPackageStartupMessages(library(dplyr))
library(explore)
titanic_1 %>% explore_all()
str(titanic_1$survived)
## num [1:1043] 1 1 0 0 0 1 1 0 1 0 ...
titanic_1$survived<-factor(titanic_1$survived, levels=c("0", "1"), labels=c("0","1"))
str(titanic_1$survived)
## Factor w/ 2 levels "0","1": 2 2 1 1 1 2 2 1 2 1 ...
str(titanic_1$embarked)
## chr [1:1043] "S" "S" "S" "S" "S" "S" "S" "S" "S" "C" "C" "C" "C" "S" "S" ...
titanic_1$embarked<-factor(titanic_1$embarked, levels=c("C","Q","S"), labels=c("C","Q","S"))
str(titanic_1$embarked)
## Factor w/ 3 levels "C","Q","S": 3 3 3 3 3 3 3 3 3 1 ...
str(titanic_1$sex)
## chr [1:1043] "female" "male" "female" "male" "female" "male" "female" ...
titanic_1$sex<-factor(titanic_1$sex, levels=c("female","male"), labels=c("female", "male"))
str(titanic_1$sex)
## Factor w/ 2 levels "female","male": 1 2 1 2 1 2 1 2 1 2 ...
head(titanic_1)
## # A tibble: 6 × 7
## survived sex age sibsp parch fare embarked
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <fct>
## 1 1 female 29 0 0 211. S
## 2 1 male 0.92 1 2 152. S
## 3 0 female 2 1 2 152. S
## 4 0 male 30 1 2 152. S
## 5 0 female 25 1 2 152. S
## 6 1 male 48 0 0 26.6 S
The dependent variable is Survived. Below is its proportion in the dataset.
table(titanic_1$survived)
##
## 0 1
## 618 425
prop.table(table(titanic_1$survived))
##
## 0 1
## 0.5925216 0.4074784
Plot Survival with other features to see if any correlation exists.
Sex:
79.5% of the males did not survive while 20.5% survived.
24.9% of the females did not survive while 75.1% survived.
The female passengers had a higher survival rate in the titanic than the male passengers.
library(explore)
titanic_1 %>% explore(survived, target=sex)
Embarked:
64.1% of the passengers that came in trough S didn’t survive while 35.9% survived.
74% of the passengers that came in through Q didn’t survive while 26% survived.
37.7% of the passengers that came in though C didn’t survive while 62.3% survived.
Port C has the highest survival rate out of the 3 ports.
titanic_1 %>% explore(survived, target=embarked)
Sibsp:
62.9% of passengers without siblings or spouses onboard didn’t survive while 37.1% survived.
47.5% of passengers with 1 siblings or spouses onboard didn’t survive while 52.5% survived…etc.
titanic_1 %>% explore(survived, target=sibsp)
Prop_Sibsp<- titanic_1 %>%
group_by(sibsp) %>%
summarise(Survived = mean(survived == 1), DidnotSurvive = mean(survived == 0))
Prop_Sibsp
## # A tibble: 7 × 3
## sibsp Survived DidnotSurvive
## <dbl> <dbl> <dbl>
## 1 0 0.371 0.629
## 2 1 0.525 0.475
## 3 2 0.444 0.556
## 4 3 0.375 0.625
## 5 4 0.136 0.864
## 6 5 0 1
## 7 8 0 1
Parch:
65% of passengers with no parents or children onboard did not survive while 35% survived.
40.6% of passengers with 1 parent or child aboard did not survive while 59.4% survived…etc.
titanic_1 %>% explore(survived, target=parch)
Prop_Parch<- titanic_1 %>%
group_by(parch) %>%
summarise(Survived = mean(survived == 1), DidnotSurvive = mean(survived == 0))
Prop_Parch
## # A tibble: 7 × 3
## parch Survived DidnotSurvive
## <dbl> <dbl> <dbl>
## 1 0 0.350 0.650
## 2 1 0.594 0.406
## 3 2 0.567 0.433
## 4 3 0.625 0.375
## 5 4 0.2 0.8
## 6 5 0.167 0.833
## 7 6 0 1
titanic %>% explore(survived, target=fare)
Prop_fare<- titanic_1 %>%
group_by(fare) %>%
summarise(Survived = mean(survived == 1), DidnotSurvive = mean(survived == 0))
head(Prop_fare)
## # A tibble: 6 × 3
## fare Survived DidnotSurvive
## <dbl> <dbl> <dbl>
## 1 0 0.25 0.75
## 2 3.17 1 0
## 3 4.01 0 1
## 4 5 0 1
## 5 6.24 0 1
## 6 6.44 0 1
Age:
64.1% of passengers aged 18 did not survive while 35.9% survived.
73.2% of passengers aged 21 did not survive while 26.8% survived…etc.
Both the youngest passenger(17 months) and the oldest passenger(80 years) survived.
titanic_1%>%explore_bar(survived, target = age)
Prop_age<-titanic_1%>%
group_by(age)%>%
summarise(Survived = mean(survived == 1), DidnotSurvive = mean(survived == 0))
head(Prop_age)
## # A tibble: 6 × 3
## age Survived DidnotSurvive
## <dbl> <dbl> <dbl>
## 1 0.17 1 0
## 2 0.33 0 1
## 3 0.42 1 0
## 4 0.67 1 0
## 5 0.75 0.667 0.333
## 6 0.83 1 0
set.seed(1000)
titanic_rows<-nrow(titanic_1)
titanic_train_index<-1:(titanic_rows * 0.8)
titanic_test_index<-(titanic_rows * 0.8 + 1):titanic_rows
titanic_test_index<-as.integer((titanic_test_index))
titanic_train<-titanic_1[titanic_train_index, ]
titanic_test<-titanic_1[titanic_test_index, ]
train_titanic<-titanic_1[1:834, ]
test_titanic<-titanic_1[835:1043, ]
library(rpart)
fit<-rpart(survived~sex+age+sibsp+parch+fare+embarked,
data=train_titanic,
method="class")
fit
## n= 834
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 834 373 0 (0.5527578 0.4472422)
## 2) sex=male 511 110 0 (0.7847358 0.2152642)
## 4) age>=10 483 89 0 (0.8157350 0.1842650) *
## 5) age< 10 28 7 1 (0.2500000 0.7500000) *
## 3) sex=female 323 60 1 (0.1857585 0.8142415)
## 6) fare< 10.1625 42 19 0 (0.5476190 0.4523810)
## 12) age>=19.5 28 9 0 (0.6785714 0.3214286) *
## 13) age< 19.5 14 4 1 (0.2857143 0.7142857) *
## 7) fare>=10.1625 281 37 1 (0.1316726 0.8683274)
## 14) sibsp>=3.5 7 1 0 (0.8571429 0.1428571) *
## 15) sibsp< 3.5 274 31 1 (0.1131387 0.8868613) *
plot(fit)
In order to get a better and more readable tree the following libraries are used:
Rattle
Rpart.plot
RColorBrewer
suppressPackageStartupMessages(library(rattle))
library(rattle)
library(rpart.plot)
library(RColorBrewer)
From the tree below, the most important feature is Sex.
I agree with the titanic slogan, “women and children first”, and it is accurately depicted in the decision tree:
The tree shows that the women and children had a higher rate of survival than the men.
The tree is initally split by sex=male indicating that this model uses sex of passengers as a major distinction for prediction.
The tree is then split into two nodes, Node 2 being on the basis that the sex is male and Node 3 that the sex is female.
Note that there were more males(61%) than females (39%).
Node 2:
In node 2, 78% of the male passengers did not survive while 22% survived.
Node 2 is further split by age >= 10 into two nodes(Nodes 4 & 5) with Node 4 being male passengers older than 10 and Node 5 being male passengers younger than 10.
In node 4, 82% of the male passengers did not survive and 18% survived while in node 5, 25% of the male passengers did not survive and 75% survived which indicates that the younger male passengers had a higher survival rate than the older male passengers.
Note that node 4 covers 58% of the male passengers and node 5 covers 3% of the male passengers based on node 2.
Node 3:
In node 3, 19% of the female passengers did not survive while 81% survived.
Node 3 is further split by fare < 10 into two nodes (Nodes 6 & 7) with Node 6 being female passengers with fares less than 10 and Node 7 being female passengers with fares greater than 10.
In node 6, 55% of the female passengers did not survive and 45% survived while in node 7, 13% of the female passengers did not survive and 87% survived indicating that female passengers with higher fares had a higher survival rate than female passengers with lower fares.
Node 6 covers 5% of the female passengers and node 7 covers 34% of the female passengers based on node 3.
Node 6 is further split by age >= 20 into two more (nodes 12 & 13) with node 12 being female passengers aged 20 or older and node 13 being female passengers younger than 20.
In node 12, 68% of the older female passengers did not survive and 32% survived while in node 13, 29% of the younger female passengers did not survive and 71% survived indicating that the younger female passengers had a higher survival rate than the older female passengers.
Node 12 covers 3% of the female passengers while node 13 covers 2% of the females passengers based on node 6.
Node 7 is also further split by sibsp>=3.5 into two nodes (nodes 14 & 15) with node 14 being female passengers with 4(approximated) or more siblings or spouses onboard and node 15 being female passengers with less than 4(approximated) siblings or spouses on board.
In node 14, 86% of female passengers with 4(approximated) or more siblings or spouses onboard did not survive and 14% survived while in node 15, 11% of female passengers with less than 4(approximated) siblings or spouses on board did not survive and 89% survived indicating that female passengers with less siblings or spouses had a higher survival.
Node 14 covers 1% of female passengers while node 15 covers 33% of the female passengers based on node 7.
fancyRpartPlot(fit)
Prediction<-predict(fit, test_titanic, type = "class")
Prediction_df<-data.frame(PassengerSex=test_titanic$sex, Survived=Prediction)
Prediction_df
## PassengerSex Survived
## 1 male 0
## 2 male 0
## 3 male 0
## 4 male 0
## 5 male 0
## 6 male 0
## 7 female 0
## 8 female 0
## 9 male 0
## 10 female 1
## 11 male 0
## 12 male 0
## 13 male 0
## 14 female 1
## 15 male 0
## 16 male 0
## 17 male 0
## 18 female 0
## 19 male 0
## 20 male 0
## 21 male 0
## 22 male 0
## 23 female 0
## 24 male 0
## 25 male 0
## 26 female 1
## 27 female 0
## 28 male 0
## 29 female 1
## 30 male 0
## 31 male 0
## 32 male 0
## 33 male 0
## 34 male 0
## 35 male 0
## 36 male 1
## 37 female 1
## 38 male 0
## 39 female 0
## 40 male 0
## 41 male 0
## 42 female 1
## 43 female 1
## 44 male 0
## 45 female 1
## 46 male 0
## 47 male 0
## 48 female 1
## 49 female 0
## 50 male 0
## 51 female 1
## 52 female 0
## 53 male 0
## 54 male 0
## 55 male 0
## 56 male 0
## 57 male 0
## 58 female 0
## 59 male 0
## 60 male 0
## 61 female 0
## 62 male 1
## 63 male 0
## 64 male 0
## 65 female 0
## 66 male 0
## 67 male 0
## 68 male 0
## 69 female 0
## 70 female 0
## 71 male 0
## 72 male 0
## 73 female 0
## 74 male 1
## 75 male 1
## 76 female 1
## 77 female 1
## 78 female 1
## 79 male 1
## 80 male 1
## 81 male 1
## 82 male 0
## 83 male 0
## 84 female 1
## 85 male 0
## 86 male 0
## 87 male 0
## 88 male 1
## 89 female 1
## 90 female 1
## 91 male 0
## 92 male 0
## 93 male 0
## 94 male 0
## 95 male 0
## 96 female 0
## 97 male 0
## 98 male 0
## 99 female 1
## 100 male 0
## 101 male 0
## 102 male 0
## 103 male 0
## 104 male 0
## 105 male 0
## 106 male 1
## 107 male 1
## 108 male 1
## 109 male 1
## 110 female 1
## 111 female 1
## 112 male 0
## 113 male 0
## 114 female 1
## 115 female 1
## 116 male 0
## 117 female 1
## 118 male 0
## 119 male 0
## 120 male 0
## 121 male 0
## 122 male 0
## 123 male 0
## 124 female 0
## 125 male 0
## 126 female 1
## 127 female 1
## 128 female 1
## 129 male 0
## 130 male 0
## 131 male 0
## 132 male 0
## 133 male 0
## 134 male 0
## 135 male 0
## 136 female 1
## 137 male 1
## 138 male 0
## 139 female 1
## 140 female 1
## 141 male 0
## 142 female 1
## 143 male 0
## 144 male 0
## 145 male 0
## 146 male 0
## 147 female 0
## 148 male 0
## 149 male 0
## 150 female 0
## 151 male 0
## 152 male 0
## 153 female 1
## 154 female 1
## 155 male 0
## 156 male 0
## 157 male 0
## 158 male 0
## 159 male 0
## 160 male 0
## 161 male 0
## 162 male 0
## 163 male 1
## 164 female 1
## 165 male 0
## 166 male 0
## 167 male 0
## 168 male 0
## 169 male 1
## 170 female 1
## 171 female 1
## 172 male 0
## 173 female 1
## 174 female 0
## 175 male 0
## 176 male 0
## 177 female 1
## 178 male 0
## 179 female 1
## 180 male 0
## 181 male 0
## 182 male 0
## 183 male 0
## 184 female 1
## 185 male 0
## 186 male 0
## 187 female 1
## 188 male 0
## 189 male 0
## 190 female 1
## 191 male 0
## 192 male 0
## 193 male 0
## 194 female 0
## 195 male 0
## 196 male 0
## 197 male 0
## 198 female 0
## 199 male 0
## 200 male 0
## 201 male 0
## 202 male 0
## 203 male 0
## 204 female 1
## 205 male 0
## 206 female 1
## 207 male 0
## 208 male 0
## 209 male 0
write.csv(Prediction_df, file="Titanicdtree.csv", row.names = FALSE)