library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)

# Load Titanic dataset
titanic = read.csv("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")

# View dataset structure
str(titanic)
## 'data.frame':    891 obs. of  12 variables:
##  $ PassengerId: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Survived   : int  0 1 1 1 0 0 0 0 1 1 ...
##  $ Pclass     : int  3 1 3 1 3 3 1 3 3 2 ...
##  $ Name       : chr  "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" "Futrelle, Mrs. Jacques Heath (Lily May Peel)" ...
##  $ Sex        : chr  "male" "female" "female" "female" ...
##  $ Age        : num  22 38 26 35 35 NA 54 2 27 14 ...
##  $ SibSp      : int  1 1 0 1 0 0 0 3 0 1 ...
##  $ Parch      : int  0 0 0 0 0 0 0 1 2 0 ...
##  $ Ticket     : chr  "A/5 21171" "PC 17599" "STON/O2. 3101282" "113803" ...
##  $ Fare       : num  7.25 71.28 7.92 53.1 8.05 ...
##  $ Cabin      : chr  "" "C85" "" "C123" ...
##  $ Embarked   : chr  "S" "C" "S" "S" ...
titanic = titanic %>%
  select(-PassengerId, -Name, -Ticket, -Cabin)

titanic[titanic == ""] = NA

titanic = na.omit(titanic)
# 1, Bar chart

# Pass dist. by gender
ggplot(titanic, aes(x = Sex)) +
  geom_bar(fill = "pink") +
  labs(title = "Pass dist. by Gender", x = "Gender", y = "Nmb of Pass.")

# Survl by cLass
ggplot(titanic, aes(x = factor(Pclass), fill = factor(Survived))) +
  geom_bar(position = "dodge") +
  labs(title = "Survl by Pass' class.", x = "Class", y = "Num of Pass.", fill = "Survived")

# by gender
ggplot(titanic, aes(x = Sex, fill = factor(Survived))) +
  geom_bar(position = "dodge") +
  labs(title = "Survive by Gender/Sex", x = "Gender", y = "Num of Pass.", fill = "Survived")

# 2. Box PLot

# Fare dist, (survival)
ggplot(titanic, aes(x = factor(Survived), y = Fare, fill = factor(Survived))) +
  geom_boxplot() +
  scale_fill_manual(values = c("gray", "blue")) +
  labs(title = "FD by Survival", x = "Survived", y = "Fare") +
  theme_minimal()

# Age Dist, (class)
ggplot(titanic, aes(x = factor(Pclass), y = Age, fill = factor(Pclass))) +
  geom_boxplot() +
  scale_fill_manual(values = c("blue", "gray", "green")) +
  labs(title = "AD by Class", x = "Class", y = "Age") +
  theme_minimal()

# 3. ScaTTeR plot
# comparison age and fare (Survival)
ggplot(titanic, aes(x = Age, y = Fare, color = factor(Survived))) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  scale_color_manual(values = c("gray", "blue")) +
  labs(title = "Age vs Fare || Survival", x = "Age", y = "Fare", color = "Survived") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# comparison age and fare (class)
ggplot(titanic, aes(x = Age, y = Fare, color = factor(Pclass))) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  scale_color_manual(values = c("blue", "gray", "green")) +
  labs(title = "Age vs Fare || Class", x = "Age", y = "Fare", color = "Class") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'