Titanic Data Analysis

train    <- read.csv("~/Downloads/train.csv", stringsAsFactors = FALSE)
df_clean <- na.omit(train)

stargazer(
  df_clean,
  type     = "text",
  title    = "Summary Statistics Table for Titanic",
  digits   = 1,
  align    = TRUE,
  no.space = TRUE
)


Summary Statistics Table for Titanic
========================================
Statistic    N  Mean  St. Dev. Min  Max 
----------------------------------------
PassengerId 714 448.6  259.1    1   891 
Survived    714  0.4    0.5     0    1  
Pclass      714  2.2    0.8     1    3  
Age         714 29.7    14.5   0.4 80.0 
SibSp       714  0.5    0.9     0    5  
Parch       714  0.4    0.9     0    6  
Fare        714 34.7    52.9   0.0 512.3
----------------------------------------

library(ggplot2)

ggplot(
  df_clean,
  aes(x = factor(Survived, labels = c("No","Yes")), y = Pclass)
) +
  geom_boxplot(
    fill = "lightblue",
    color = "darkblue",
    outlier.color = "red"
  ) +
  labs(
    x     = "Survived",
    y     = "Passenger Class",
    title = "Class Distribution by Survival Status"
  ) +
  theme_minimal()

library(ggplot2)

ggplot(
  df_clean,
  aes(x = Age, fill = factor(Survived, labels = c("No","Yes")))
) +
  geom_histogram(binwidth = 2, alpha = 0.7, position = "identity") +
  facet_wrap(
    ~ Survived,
    labeller = as_labeller(c(`0` = "Did Not Survive", `1` = "Survived"))
  ) +
  labs(
    title = "Histogram of Age by Survival Status",
    x     = "Age",
    y     = "Frequency",
    fill  = "Survived"
  ) +
  theme_light()