library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Loads the TidyVerse package for data manipulation and plotting
titanicdata <- readxl::read_excel("TitanicData.xlsx")
## New names:
## • `` -> `...1`
head(titanicdata)
## # A tibble: 6 × 10
## ...1 gender age class embarked country fare sibsp parch survived
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 male 42 3rd Southampton United States 7.11 0 0 no
## 2 2 male 13 3rd Southampton United States 20.05 0 2 no
## 3 3 male 16 3rd Southampton United States 20.05 1 1 no
## 4 4 female 39 3rd Southampton England 20.05 1 1 yes
## 5 5 female 16 3rd Southampton Norway 7.13 0 0 yes
## 6 6 male 25 3rd Southampton United States 7.13 0 0 yes
Reads the TitanicData Excel sheet and turns it into a dataframe
pclass_survival <- titanicdata %>%
group_by(class, survived) %>%
summarize(n = n(), .groups = "drop_last") %>%
mutate(percent = n / sum(n))
pclass_survival
## # A tibble: 14 × 4
## # Groups: class [7]
## class survived n percent
## <chr> <chr> <int> <dbl>
## 1 1st no 123 0.380
## 2 1st yes 201 0.620
## 3 2nd no 166 0.585
## 4 2nd yes 118 0.415
## 5 3rd no 528 0.745
## 6 3rd yes 181 0.255
## 7 deck crew no 23 0.348
## 8 deck crew yes 43 0.652
## 9 engineering crew no 253 0.781
## 10 engineering crew yes 71 0.219
## 11 restaurant staff no 66 0.957
## 12 restaurant staff yes 3 0.0435
## 13 victualling crew no 337 0.782
## 14 victualling crew yes 94 0.218
Groups passengers by their class and survival status Counts how many people are in each group Calculates the percent of people who survived in each class
pclass_sex_survival <- titanicdata %>%
group_by(class, gender, survived) %>%
summarize(n = n(), .groups = "drop_last") %>%
mutate(percent = n / sum(n))
Groups passengers by class, gender, and survival status Counts the number in each group Calculates the percent survival rate within each class and gender combination
pclass_sex_survival_graph <- pclass_sex_survival %>%
filter(survived == "yes") %>% # match lowercase in your file
ggplot(aes(x = class, y = percent, fill = class)) +
geom_col() +
facet_grid(~ gender)
Filters the data to only include survivors Creates a bar chart showing the percent of survivors in each class Splits the chart into two facets, one for male and female
pclass_sex_survival_graph +
labs(title = "Titanic Survival Rates",
subtitle = "Percent by Gender and Cabin Class",
caption = "Source: Encyclopedia Titanica") +
scale_y_continuous(labels = scales::percent) +
theme_grey() +
theme(axis.title = element_blank(),
legend.position = "none")
Styles the plot Adds title, subtitle, and data source caption to the chart Formats the y-axis to show percentage Applies grey theme Remove Axis labels and hides the legend