library('dplyr') # data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library('tidyr') # data manipulation
library('readr') # data input
library('stringr') # string manipulation
library('forcats') # factor manipulation
library('modelr') # factor manipulation
library('ggplot2')
train <- read.csv("C:/Users/Lenovo/Downloads/train.csv")
View(train)
train <- train %>% mutate(
  Survived = factor(Survived),
  Pclass = factor(Pclass),
  Embarked = factor(Embarked),
  Sex = factor(Sex)
)
p_age = ggplot(train) +
  geom_freqpoly(mapping = aes(x = Age, color = Survived), binwidth = 1) +
  theme(legend.position = "right")

p_sex = ggplot(train, mapping = aes(x = Sex, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Sex') +
  scale_fill_discrete(name="Surv")

p_class = ggplot(train, mapping = aes(x = Pclass, fill = Survived, colour = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Pclass') +
  theme(legend.position = "none")

p_emb = ggplot(train, aes(Embarked, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Embarked') +
  theme(legend.position = "none")

p_sib = ggplot(train, aes(SibSp, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'SibSp') +
  theme(legend.position = "none")

p_par = ggplot(train, aes(Parch, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Parch') +
  theme(legend.position = "none")

p_fare = ggplot(train) +
  geom_freqpoly(mapping = aes(Fare, color = Survived), binwidth = 0.05) +
  scale_x_log10() +
  theme(legend.position = "none")

p_age
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_bin()`).

p_sex

p_fare
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_bin()`).

p_class

p_emb

p_sib

p_par

p_age = ggplot(train) +
  geom_freqpoly(mapping = aes(x = Age, color = Survived), binwidth = 1) +
  theme(legend.position = "right")

p_sex = ggplot(train, mapping = aes(x = Sex, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Sex') +
  scale_fill_discrete(name="Surv")

p_class = ggplot(train, mapping = aes(x = Pclass, fill = Survived, colour = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Pclass') +
  theme(legend.position = "none")

p_emb = ggplot(train, aes(Embarked, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Embarked') +
  theme(legend.position = "none")

p_sib = ggplot(train, aes(SibSp, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'SibSp') +
  theme(legend.position = "none")

p_par = ggplot(train, aes(Parch, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Parch') +
  theme(legend.position = "none")

p_fare = ggplot(train) +
  geom_freqpoly(mapping = aes(Fare, color = Survived), binwidth = 0.05) +
  scale_x_log10() +
  theme(legend.position = "none")

p_age
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_bin()`).

p_sex

p_fare
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_bin()`).

p_class

p_emb

p_sib

p_par

train <- train %>% mutate(
  Survived = factor(Survived),
  Pclass = factor(Pclass),
  Embarked = factor(Embarked),
  Sex = factor(Sex))
survived_summary <- train %>%
  group_by(Survived) %>%
  summarise(count = n()) %>%
  mutate(percentage = count / sum(count) * 100)

 
p1 <- ggplot(survived_summary, aes(x = "",percentage, fill = Survived)) +
  geom_bar(width = 1, stat = "identity") +
  coord_polar(theta = "y") +
  geom_text(aes(label = paste0(round(percentage, 1), "%")), position =
              position_stack(vjust = 0.5)) +
  labs(title = "Survival Distribution") +
  theme_void()
sex_summary <- train %>% 
  group_by(Sex) %>% 
  summarise(countx = n()) %>% 
  mutate(percentagex = countx / sum(countx) *100)

p2 <- ggplot(sex_summary, aes("", percentagex, fill = Sex)) +
  geom_bar(stat = "identity") +
  coord_polar(theta = "y") +
  geom_text(aes(label = paste0(round(percentagex, 1), "%")), 
            position = position_stack(vjust = 0.5)) +
  theme_void() +
  labs(title = "Sex Distribution")
sex_survived_summary <- train %>%
  group_by(Sex, Survived) %>%
  summarise(count = n(), .groups = 'drop') %>%
  mutate(percentage = count / sum(count) * 100)

sex_survived_summary <- sex_survived_summary %>%
  group_by(Survived) %>%
  mutate(percentage = count / sum(count) * 100)


  p3 <- ggplot(sex_survived_summary, aes(x = "", y = percentage, fill = Sex))+
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") +
  geom_text(aes(label = paste0(round(percentage, 1), "%")), 
            position = position_stack(vjust = 0.5)) +
  labs(title = "Distribution of Sex by Survival Status") +
  theme_void() +
  facet_wrap(~Survived)
p1

p2

p3