library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(haven)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: survival
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
titanic_data <- read_csv("C:/Users/Shamp/OneDrive/Desktop/Data 712/titanic_data.csv", col_names = TRUE)
## Rows: 891 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Name, Sex, Ticket, Cabin, Embarked
## dbl (7): PassengerId, Survived, Pclass, Age, SibSp, Parch, Fare
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(titanic_data)
## # A tibble: 6 × 12
##   PassengerId Survived Pclass Name    Sex     Age SibSp Parch Ticket  Fare Cabin
##         <dbl>    <dbl>  <dbl> <chr>   <chr> <dbl> <dbl> <dbl> <chr>  <dbl> <chr>
## 1           1        0      3 Braund… male     22     1     0 A/5 2…  7.25 <NA> 
## 2           2        1      1 Cuming… fema…    38     1     0 PC 17… 71.3  C85  
## 3           3        1      3 Heikki… fema…    26     0     0 STON/…  7.92 <NA> 
## 4           4        1      1 Futrel… fema…    35     1     0 113803 53.1  C123 
## 5           5        0      3 Allen,… male     35     0     0 373450  8.05 <NA> 
## 6           6        0      3 Moran,… male     NA     0     0 330877  8.46 <NA> 
## # ℹ 1 more variable: Embarked <chr>
# Install packages if not already installed
install.packages("tidyverse")  # Includes readr, dplyr, ggplot2, etc.
## Warning: package 'tidyverse' is in use and will not be installed
# Load libraries
library(tidyverse)
# Average difference in ticket price (Fare) between men and women (Sex) 

titanic_data %>%
  group_by(Sex) %>%
  summarise(Average_Fare = mean(Fare, na.rm = TRUE))
## # A tibble: 2 × 2
##   Sex    Average_Fare
##   <chr>         <dbl>
## 1 female         44.5
## 2 male           25.5
# Average difference in ticket price (Fare) between passenger classes (Pclass)

titanic_data %>%
  group_by(Pclass) %>%
  summarise(Average_Fare = mean(Fare, na.rm = TRUE))
## # A tibble: 3 × 2
##   Pclass Average_Fare
##    <dbl>        <dbl>
## 1      1         84.2
## 2      2         20.7
## 3      3         13.7
# Average survival chance (Survived) between men and women

titanic_data %>%
  group_by(Sex) %>%
  summarise(Survival_Rate = mean(Survived, na.rm = TRUE))
## # A tibble: 2 × 2
##   Sex    Survival_Rate
##   <chr>          <dbl>
## 1 female         0.742
## 2 male           0.189
# Average survival chance (Survived) between Sex passenger class (Pclass)

titanic_data %>%
group_by(Pclass) %>%
  summarise(Survival_Rate = mean(Survived, na.rm = TRUE))
## # A tibble: 3 × 2
##   Pclass Survival_Rate
##    <dbl>         <dbl>
## 1      1         0.630
## 2      2         0.473
## 3      3         0.242
# Boxplot by ticket price (Fare) and gender (Sex)

ggplot(titanic_data, aes(x = Sex, y = Fare, fill = Sex)) +
  geom_boxplot() +
  ggtitle("Distribution of Fare by Sex")

# Survival Rate by Sex and Passenger Class (Crosstab)

titanic_data %>%
  group_by(Sex, Pclass) %>%
  summarise(Survival_Rate = mean(Survived, na.rm = TRUE)) %>%
  arrange(desc(Survival_Rate))
## `summarise()` has grouped output by 'Sex'. You can override using the `.groups`
## argument.
## # A tibble: 6 × 3
## # Groups:   Sex [2]
##   Sex    Pclass Survival_Rate
##   <chr>   <dbl>         <dbl>
## 1 female      1         0.968
## 2 female      2         0.921
## 3 female      3         0.5  
## 4 male        1         0.369
## 5 male        2         0.157
## 6 male        3         0.135
# ANOVA Test for Fare Differences

anova_result <- aov(Fare ~ Sex + Pclass, data = titanic_data)
summary(anova_result)
##              Df  Sum Sq Mean Sq F value   Pr(>F)    
## Sex           1   73066   73066   43.05 9.06e-11 ***
## Pclass        1  617551  617551  363.85  < 2e-16 ***
## Residuals   888 1507182    1697                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Titanic Data Analysis: Money, Class, and Survival

In April 1912, the Titanic, the world’s most luxurious ship, embarked on its maiden voyage carrying over 2,200 passengers. Each passenger had their own dreams and destinations, but when the ship struck an iceberg, those dreams turned into a fight for survival. Not everyone had an equal chance to live. Gender, social class, and the price of their tickets played an important role in determining who survived. This analysis explores how these factors - money, class, and gender shaped the fate of the Titanic’s passengers.

Gender and Ticket Prices: Why Did Women Pay More?

The data reveals that women paid an average fare of $44.48, while men paid only $25.52. At first glance, this difference seems surprising. Why would women pay more? The answer lies in the social norms and travel habits of the time. Wealthy families and single women often traveled in First Class, enjoying spacious cabins, fine dining, and luxurious amenities. Many of these passengers were upper-class women, accompanied by their families or maids. On the other hand, a large number of men traveled in Third Class, paying the lowest fares. These men were often immigrants or laborers seeking a new life in America.

The boxplot comparing ticket prices by gender shows that women generally paid higher fares than men. This is likely because more women traveled in First Class, where tickets were significantly more expensive. However, it is worth noting that some wealthy men also purchased high-priced tickets, contributing to the upper range of the fare distribution. The data also includes outliers, with a few passengers paying over $500, likely those who booked the most luxurious First-Class suites. In contrast, most men paid fares between $0 and $50, suggesting that the majority traveled in Third Class, where accommodations were modest and survival chances were lower.

Class and Ticket Prices: A First-Class Ticket to Survival

The price of a Titanic ticket was closely tied to social class. First-Class passengers paid an average of $84.15, enjoying unparalleled luxury. Second-Class passengers paid about $20.66, with decent but less luxurious accommodations. Third-Class passengers paid the least, around $13.67, but they were confined to cramped quarters below deck.

However, ticket prices were not just about comfort, they also influenced survival. When the Titanic sank, First-Class passengers had better access to lifeboats, while many Third-Class passengers were trapped below deck due to locked gates and delayed escape routes. This disparity in access to lifeboats meant that survival was heavily influenced by class.

Survival Rates: Gender and Class as Lifesavers

The Titanic’s sinking followed the “Women and Children First” protocol, meaning gender and class played a significant role in who survived. Overall, 74% of women survived, compared to only 19% of men. First-Class passengers had the highest survival rate at 63%, while only 24% of Third-Class passengers made it out alive. This stark difference shows that a wealthy woman had a far better chance of survival than a poor man.
When we break down survival rates by both gender and class, the pattern becomes even more striking. First-Class women had a remarkable 97% survival rate, meaning nearly all of them made it onto lifeboats. Second-Class women also fared well, with a 92% survival rate. However, Third-Class women struggled to escape the lower decks, and only 50% survived. For men, the situation was far worse. First-Class men had a 37% survival rate, as many gave up their seats for women and children. Second-Class men had only a 16% chance of survival, and Third-Class men had the lowest survival rate at just 13%. Tragically, many of these men were trapped below deck and never made it out.

In short, First-Class women were almost guaranteed a spot on a lifeboat, while Third-Class men faced nearly undefeatable odds of survival.

Statistical Confirmation: Ticket Prices and Survival

To confirm the relationship between ticket prices, gender, and class, an ANOVA test was conducted. The results showed clear differences. Gender significantly impacted fare prices, with a p-value of 9.06e-11, confirming that women generally paid more than men. However, class had an even stronger effect, with a p-value of less than 2e-16, showing that ticket prices were primarily determined by whether a passenger was in First, Second, or Third Class.

The F-value for class (363.85) was much higher than for gender (43.05), indicating that while women did pay more on average, the biggest price gap was between First-Class and Third-Class passengers. Paying more for a First-Class ticket did not just mean more luxury, it also meant a better chance of survival.

Conclusion: Lessons from the Titanic

The Titanic’s story is more than just a tragic shipwreck; it is a lesson in privilege, sacrifice, and survival. Wealth played a crucial role, as First-Class passengers had the best access to lifeboats. Gender was also a major factor, with women prioritized for rescue while most men stayed behind. Survival was not just about luck; it was determined by social class and the rules of the time.

The Titanic disaster exposed how society valued wealth and gender in life-or-death situations. Today, we remember not only the tragedy but also the lessons it taught us about human nature, sacrifice, and inequality.