library('dplyr') # data manipulation
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library('tidyr') # data manipulation
library('readr') # data input
library('stringr') # string manipulation
library('forcats') # factor manipulation
library('modelr') # factor manipulation
library('ggplot2')
train <- read.csv("C:/Users/Lenovo/Downloads/train.csv")
View(train)
train <- train %>% mutate(
Survived = factor(Survived),
Pclass = factor(Pclass),
Embarked = factor(Embarked),
Sex = factor(Sex)
)
p_age = ggplot(train) +
geom_freqpoly(mapping = aes(x = Age, color = Survived), binwidth = 1) +
theme(legend.position = "right")
p_sex = ggplot(train, mapping = aes(x = Sex, fill = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'Sex') +
scale_fill_discrete(name="Surv")
p_class = ggplot(train, mapping = aes(x = Pclass, fill = Survived, colour = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'Pclass') +
theme(legend.position = "none")
p_emb = ggplot(train, aes(Embarked, fill = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'Embarked') +
theme(legend.position = "none")
p_sib = ggplot(train, aes(SibSp, fill = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'SibSp') +
theme(legend.position = "none")
p_par = ggplot(train, aes(Parch, fill = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'Parch') +
theme(legend.position = "none")
p_fare = ggplot(train) +
geom_freqpoly(mapping = aes(Fare, color = Survived), binwidth = 0.05) +
scale_x_log10() +
theme(legend.position = "none")
p_age
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_bin()`).

p_sex

p_fare
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_bin()`).

p_class

p_emb

p_sib

p_par

p_age = ggplot(train) +
geom_freqpoly(mapping = aes(x = Age, color = Survived), binwidth = 1) +
theme(legend.position = "right")
p_sex = ggplot(train, mapping = aes(x = Sex, fill = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'Sex') +
scale_fill_discrete(name="Surv")
p_class = ggplot(train, mapping = aes(x = Pclass, fill = Survived, colour = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'Pclass') +
theme(legend.position = "none")
p_emb = ggplot(train, aes(Embarked, fill = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'Embarked') +
theme(legend.position = "none")
p_sib = ggplot(train, aes(SibSp, fill = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'SibSp') +
theme(legend.position = "none")
p_par = ggplot(train, aes(Parch, fill = Survived)) +
geom_bar(stat='count', position='fill') +
labs(x = 'Parch') +
theme(legend.position = "none")
p_fare = ggplot(train) +
geom_freqpoly(mapping = aes(Fare, color = Survived), binwidth = 0.05) +
scale_x_log10() +
theme(legend.position = "none")
p_age
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_bin()`).

p_sex

p_fare
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_bin()`).

p_class

p_emb

p_sib

p_par

train <- train %>% mutate(
Survived = factor(Survived),
Pclass = factor(Pclass),
Embarked = factor(Embarked),
Sex = factor(Sex))
survived_summary <- train %>%
group_by(Survived) %>%
summarise(count = n()) %>%
mutate(percentage = count / sum(count) * 100)
p1 <- ggplot(survived_summary, aes(x = "",percentage, fill = Survived)) +
geom_bar(width = 1, stat = "identity") +
coord_polar(theta = "y") +
geom_text(aes(label = paste0(round(percentage, 1), "%")), position =
position_stack(vjust = 0.5)) +
labs(title = "Survival Distribution") +
theme_void()
sex_summary <- train %>%
group_by(Sex) %>%
summarise(countx = n()) %>%
mutate(percentagex = countx / sum(countx) *100)
p2 <- ggplot(sex_summary, aes("", percentagex, fill = Sex)) +
geom_bar(stat = "identity") +
coord_polar(theta = "y") +
geom_text(aes(label = paste0(round(percentagex, 1), "%")),
position = position_stack(vjust = 0.5)) +
theme_void() +
labs(title = "Sex Distribution")
sex_survived_summary <- train %>%
group_by(Sex, Survived) %>%
summarise(count = n(), .groups = 'drop') %>%
mutate(percentage = count / sum(count) * 100)
sex_survived_summary <- sex_survived_summary %>%
group_by(Survived) %>%
mutate(percentage = count / sum(count) * 100)
p3 <- ggplot(sex_survived_summary, aes(x = "", y = percentage, fill = Sex))+
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y") +
geom_text(aes(label = paste0(round(percentage, 1), "%")),
position = position_stack(vjust = 0.5)) +
labs(title = "Distribution of Sex by Survival Status") +
theme_void() +
facet_wrap(~Survived)
p1

p2

p3
