titanic_data <- read.csv("cleaned_titanic_data.csv")

Created a titanic_data dataframe from csv file

install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("plotly")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

Installed and Attached Necessary Packages

ggplot(titanic_data, aes(x = as.factor(survived))) +
  geom_bar(fill="purple") +
  xlab("Survived") +
  ylab("Count") +
  ggtitle("Count of Survived Passengers on the Titanic")

Bar Plot of Survived

ggplot(titanic_data, aes(x = age)) +
  geom_histogram(binwidth = 5, fill = "blue", color = "black") +
  xlab("Age") +
  ylab("Count") +
  ggtitle("Age Distribution of Passengers")

Histogram of Age

ggplot(titanic_data, aes(x = as.factor(survived), y = age)) +
  geom_boxplot(fill="orange") +
  xlab("Survived") +
  ylab("Age") +
  ggtitle("Age Distribution by Survival Status")

Boxplot of Age by Survived

ggplot(titanic_data, aes(x = as.factor(survived), y = age)) +
  geom_violin(fill="orange") +
  xlab("Survived") +
  ylab("Age") +
  ggtitle("Age Distribution and Density by Survival Status")

Violin Plot of Age by Survived

ggplot(titanic_data, aes(x = as.factor(pclass))) +
  geom_bar(fill = "red") +
  xlab("Passenger Class") +
  ylab("Count") +
  ggtitle("Count of Passengers by Class")

Bar Plot of Passenger Class

ggplot(titanic_data, aes(x = embarked)) +
  geom_bar(fill = "purple") +
  xlab("Embarkation Point") +
  ylab("Count") +
  ggtitle("Count of Passengers by Embarkation Point")

Bar Plot of Embarked

ggplot(titanic_data, aes(x = age, y = fare)) +
  geom_point(color = "orange") +
  xlab("Age") +
  ylab("Fare") +
  ggtitle("Scatter Plot of Age vs. Fare")

Scatter Plot of Age vs Fare

ggplot(titanic_data, aes(x = age, y = fare)) +
  geom_point(colour="blue") +
  facet_grid(. ~ survived) +
  xlab("Age") +
  ylab("Fare") +
  ggtitle("Age vs. Fare by Survival Status")

Facet Grid of Age vs. Fare by Survived

ggplot(titanic_data, aes(x = age, y = fare)) +
  geom_point(color = "brown") +
  facet_grid(. ~ pclass) +
  xlab("Age") +
  ylab("Fare") +
  ggtitle("Age vs. Fare by Passenger Class")

Facet Grid of Age vs. Fare by pclass

ggplot(titanic_data, aes(x = pclass, fill = as.factor(survived))) +
  geom_bar(position = "fill") +
  xlab("Passenger Class") +
  ylab("Proportion") +
  labs(fill = "Survived") +
  ggtitle("Survival Proportions by Passenger Class")

Stacked Bar Plot of Survival by Passenger Class

titanic_summary <- titanic_data %>%
  group_by(pclass, survived) %>%
  summarise(count = n()) %>%
  mutate(percentage = count / sum(count) * 100, 
         survival_status = ifelse(survived == 1, "Survived", "Did Not Survive"))
## `summarise()` has grouped output by 'pclass'. You can override using the
## `.groups` argument.

Calculate counts and percentages for interactive plot

titanic_summary <- as.data.frame(titanic_summary)

Convert to data frame

plot <- plot_ly(titanic_summary, 
                x = ~pclass, 
                y = ~percentage, 
                type = 'bar', 
                color = ~as.factor(survived),
                text = ~paste('Status:', survival_status, '<br>Percentage:', round(percentage, 2), '%'),
                hoverinfo = 'text',
                textposition = 'auto') %>%
  layout(barmode = 'stack',
         xaxis = list(title = 'Passenger Class'),
         yaxis = list(title = 'Percentage'),
         title = 'Survival Proportions by Passenger Class',
         legend = list(title = list(text = 'Survival Status')))

plot
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

Create the interactive stacked bar plot

ggplot_scatter <- ggplot(titanic_data, aes(x = age, y = fare, color = embarked)) +
  geom_point() +
  xlab("Age") +
  ylab("Fare") +
  ggtitle("Scatter Plot of Age vs. Fare by Embarkation") +
  theme_minimal()

ggplot_scatter

Create a scatter plot of Age vs. Fare colored by Embarkation and dislplay the plot

southampton_data <- titanic_data %>%
  filter(embarked == "S")

Filter the dataframe for passengers who embarked at Southampton

ggplot(southampton_data, aes(x = age, y = fare)) +
  geom_point(color = "red") +
  geom_smooth(method = "lm", col = "blue") +
  xlab("Age") +
  ylab("Fare") +
  ggtitle("Scatter Plot of Age vs. Fare for Southampton")
## `geom_smooth()` using formula = 'y ~ x'

Scatter Plot of Age vs Fare