train <- read.csv('train.csv')
head(train)
## PassengerId Survived Pclass
## 1 1 0 3
## 2 2 1 1
## 3 3 1 3
## 4 4 1 1
## 5 5 0 3
## 6 6 0 3
## Name Sex Age SibSp Parch
## 1 Braund, Mr. Owen Harris male 22 1 0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38 1 0
## 3 Heikkinen, Miss. Laina female 26 0 0
## 4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 0
## 5 Allen, Mr. William Henry male 35 0 0
## 6 Moran, Mr. James male NA 0 0
## Ticket Fare Cabin Embarked
## 1 A/5 21171 7.2500 S
## 2 PC 17599 71.2833 C85 C
## 3 STON/O2. 3101282 7.9250 S
## 4 113803 53.1000 C123 S
## 5 373450 8.0500 S
## 6 330877 8.4583 Q
library('dplyr')
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library('tidyr')
## Warning: package 'tidyr' was built under R version 4.3.3
library('readr')
## Warning: package 'readr' was built under R version 4.3.3
library('stringr')
## Warning: package 'stringr' was built under R version 4.3.3
library('forcats')
## Warning: package 'forcats' was built under R version 4.3.3
library('modelr')
## Warning: package 'modelr' was built under R version 4.3.3
library('ggplot2')
## Warning: package 'ggplot2' was built under R version 4.3.3
train <- train %>% mutate(
Survived = factor(Survived),
Pclass = factor(Pclass),
Embarked = factor(Embarked),
Sex = factor(Sex)
)
head(train)
## PassengerId Survived Pclass
## 1 1 0 3
## 2 2 1 1
## 3 3 1 3
## 4 4 1 1
## 5 5 0 3
## 6 6 0 3
## Name Sex Age SibSp Parch
## 1 Braund, Mr. Owen Harris male 22 1 0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38 1 0
## 3 Heikkinen, Miss. Laina female 26 0 0
## 4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 0
## 5 Allen, Mr. William Henry male 35 0 0
## 6 Moran, Mr. James male NA 0 0
## Ticket Fare Cabin Embarked
## 1 A/5 21171 7.2500 S
## 2 PC 17599 71.2833 C85 C
## 3 STON/O2. 3101282 7.9250 S
## 4 113803 53.1000 C123 S
## 5 373450 8.0500 S
## 6 330877 8.4583 Q
summary(train)
## PassengerId Survived Pclass Name Sex
## Min. : 1.0 0:549 1:216 Length:891 female:314
## 1st Qu.:223.5 1:342 2:184 Class :character male :577
## Median :446.0 3:491 Mode :character
## Mean :446.0
## 3rd Qu.:668.5
## Max. :891.0
##
## Age SibSp Parch Ticket
## Min. : 0.42 Min. :0.000 Min. :0.0000 Length:891
## 1st Qu.:20.12 1st Qu.:0.000 1st Qu.:0.0000 Class :character
## Median :28.00 Median :0.000 Median :0.0000 Mode :character
## Mean :29.70 Mean :0.523 Mean :0.3816
## 3rd Qu.:38.00 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :80.00 Max. :8.000 Max. :6.0000
## NA's :177
## Fare Cabin Embarked
## Min. : 0.00 Length:891 : 2
## 1st Qu.: 7.91 Class :character C:168
## Median : 14.45 Mode :character Q: 77
## Mean : 32.20 S:644
## 3rd Qu.: 31.00
## Max. :512.33
##
p_age = ggplot(train) +
geom_freqpoly(mapping = aes(x = Age, color = Survived), binwidth = 1) +
theme(legend.position = "right")
p_sex = ggplot(train, mapping = aes(x = Sex, fill = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'Sex') +
scale_fill_discrete(name="Surv")
p_class = ggplot(train, mapping = aes(x = Pclass, fill = Survived, colour = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'Pclass') +
theme(legend.position = "none")
p_emb = ggplot(train, aes(Embarked, fill = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'Embarked') +
theme(legend.position = "none")
p_sib = ggplot(train, aes(SibSp, fill = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'SibSp') +
theme(legend.position = "none")
p_par = ggplot(train, aes(Parch, fill = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'Parch') +
theme(legend.position = "none")
p_fare = ggplot(train) +
geom_freqpoly(mapping = aes(Fare, color = Survived), binwidth = 0.05) +
scale_x_log10() +
theme(legend.position = "none")
p_age
Fig. 2
p_sex
Fig. 2
p_fare
Fig. 2
p_class
Fig. 2
p_emb
Fig. 2
p_sib
Fig. 2
p_par
Fig. 2
custom_col = c("blue", "yellow")
pie <- ggplot(train, aes(x = "", fill = factor(Sex))) +
geom_bar(width = 1) +
theme(axis.line = element_blank(),
plot.title = element_text(hjust = 0.5, size = 22)) +
labs(fill = "class",
x = NULL,
y = NULL,
title = "Pie Chart of Gender ") +
coord_polar(theta = "y", start = 0)
scale_fill_manual(values = custom_col)
## <ggproto object: Class ScaleDiscrete, Scale, gg>
## aesthetics: fill
## axis_order: function
## break_info: function
## break_positions: function
## breaks: waiver
## call: call
## clone: function
## dimension: function
## drop: TRUE
## expand: waiver
## get_breaks: function
## get_breaks_minor: function
## get_labels: function
## get_limits: function
## get_transformation: function
## guide: legend
## is_discrete: function
## is_empty: function
## labels: waiver
## limits: NULL
## make_sec_title: function
## make_title: function
## map: function
## map_df: function
## n.breaks.cache: NULL
## na.translate: TRUE
## na.value: grey50
## name: waiver
## palette: function
## palette.cache: NULL
## position: left
## range: environment
## rescale: function
## reset: function
## train: function
## train_df: function
## transform: function
## transform_df: function
## super: <ggproto object: Class ScaleDiscrete, Scale, gg>
pie