Source: Kaggle
train=read.csv('titanic_train.csv')
head(train)
## PassengerId Survived Pclass
## 1 1 0 3
## 2 2 1 1
## 3 3 1 3
## 4 4 1 1
## 5 5 0 3
## 6 6 0 3
## Name Sex Age SibSp Parch
## 1 Braund, Mr. Owen Harris male 22 1 0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38 1 0
## 3 Heikkinen, Miss. Laina female 26 0 0
## 4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 0
## 5 Allen, Mr. William Henry male 35 0 0
## 6 Moran, Mr. James male NA 0 0
## Ticket Fare Cabin Embarked
## 1 A/5 21171 7.2500 S
## 2 PC 17599 71.2833 C85 C
## 3 STON/O2. 3101282 7.9250 S
## 4 113803 53.1000 C123 S
## 5 373450 8.0500 S
## 6 330877 8.4583 Q
train |>
ggplot(aes(x=Pclass,color=Sex,fill=Sex))+
geom_bar()+theme_bw()+labs(x='Passenger Class',y='Number of People')
Survived_factor=as.factor(train$Survived)
train |>
ggplot(aes(x=Sex,color=Survived_factor,fill=Survived_factor))+geom_bar()+theme_bw()+scale_fill_discrete(name='Survived',labels=c('No','Yes'))+guides(color='none')+labs(y='Number of People',x=NULL)+facet_wrap(~Pclass)
Look at age, sex, and passenger class:
train |>
ggplot(aes(x=Age,fill=Survived_factor))+geom_histogram(binwidth=5)+theme_bw()+scale_fill_discrete(name='Survived',labels=c('No','Yes'))+labs(y='Number of People')+
facet_wrap(Sex~Pclass)
## Warning: Removed 177 rows containing non-finite values (`stat_bin()`).