# Read the Titanic dataset from CSV file
titanic_data <- read_csv("titanic_data.csv")
## Rows: 891 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Name, Sex, Ticket, Cabin, Embarked
## dbl (7): PassengerId, Survived, Pclass, Age, SibSp, Parch, Fare
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Convert relevant columns to categorical factors for analysis
titanic_data <- titanic_data %>%
mutate(Sex = as.factor(Sex),
Pclass = as.factor(Pclass),
Survived = as.factor(Survived))
# Compute the average fare paid by male and female passengers
fare_by_sex <- titanic_data %>%
group_by(Sex) %>%
summarise(Average_Fare = mean(Fare, na.rm = TRUE))
# Compute the average fare paid by passengers in each class
fare_by_pclass <- titanic_data %>%
group_by(Pclass) %>%
summarise(Average_Fare = mean(Fare, na.rm = TRUE))
# Print the computed averages
print(fare_by_sex)
## # A tibble: 2 × 2
## Sex Average_Fare
## <fct> <dbl>
## 1 female 44.5
## 2 male 25.5
print(fare_by_pclass)
## # A tibble: 3 × 2
## Pclass Average_Fare
## <fct> <dbl>
## 1 1 84.2
## 2 2 20.7
## 3 3 13.7
# Compute survival rates based on sex
survival_by_sex <- titanic_data %>%
group_by(Sex) %>%
summarise(Survival_Rate = mean(as.numeric(as.character(Survived)), na.rm = TRUE))
# Compute survival rates based on passenger class
survival_by_pclass <- titanic_data %>%
group_by(Pclass) %>%
summarise(Survival_Rate = mean(as.numeric(as.character(Survived)), na.rm = TRUE))
# Print the survival rate results
print(survival_by_sex)
## # A tibble: 2 × 2
## Sex Survival_Rate
## <fct> <dbl>
## 1 female 0.742
## 2 male 0.189
print(survival_by_pclass)
## # A tibble: 3 × 2
## Pclass Survival_Rate
## <fct> <dbl>
## 1 1 0.630
## 2 2 0.473
## 3 3 0.242
# Boxplot to visualize fare distribution by sex
ggplot(titanic_data, aes(x = Sex, y = Fare)) +
geom_boxplot() +
ggtitle("Fare Distribution by Sex")
# Boxplot to visualize fare distribution by passenger class
ggplot(titanic_data, aes(x = Pclass, y = Fare)) +
geom_boxplot() +
ggtitle("Fare Distribution by Passenger Class")
# Bar chart to show survival distribution by sex
ggplot(titanic_data, aes(x = Sex, fill = Survived)) +
geom_bar(position = "fill") +
ggtitle("Survival Rate by Sex")
# Bar chart to show survival distribution by passenger class
ggplot(titanic_data, aes(x = Pclass, fill = Survived)) +
geom_bar(position = "fill") +
ggtitle("Survival Rate by Passenger Class")
The analysis of Titanic data shows that women had a significantly higher survival rate compared to men. This aligns with the historical “women and children first” policy that was followed during the Titanic disaster. Additionally, passengers in higher classes (Pclass 1) had a higher survival rate, likely due to better access to lifeboats and proximity to the deck. The fare analysis shows that first-class passengers paid a significantly higher fare, reflecting the economic disparity on the ship.
# Load the built-in mtcars dataset and convert relevant columns to factors
mtcars_data <- mtcars %>%
mutate(am = factor(am, labels = c("Automatic", "Manual")),
cyl = factor(cyl))
# Compute average miles per gallon (MPG) based on transmission type (automatic/manual)
mpg_by_am <- mtcars_data %>%
group_by(am) %>%
summarise(Average_MPG = mean(mpg, na.rm = TRUE))
# Compute average MPG based on the number of cylinders (4, 6, or 8)
mpg_by_cyl <- mtcars_data %>%
group_by(cyl) %>%
summarise(Average_MPG = mean(mpg, na.rm = TRUE))
# Print the results
print(mpg_by_am)
## # A tibble: 2 × 2
## am Average_MPG
## <fct> <dbl>
## 1 Automatic 17.1
## 2 Manual 24.4
print(mpg_by_cyl)
## # A tibble: 3 × 2
## cyl Average_MPG
## <fct> <dbl>
## 1 4 26.7
## 2 6 19.7
## 3 8 15.1
# Boxplot to visualize fuel efficiency by transmission type
ggplot(mtcars_data, aes(x = am, y = mpg)) +
geom_boxplot() +
ggtitle("Fuel Efficiency by Transmission Type")
# Boxplot to visualize fuel efficiency by cylinder count
ggplot(mtcars_data, aes(x = cyl, y = mpg)) +
geom_boxplot() +
ggtitle("Fuel Efficiency by Cylinder Count")
The analysis of the mtcars
dataset reveals that manual
transmission cars have higher fuel efficiency (MPG) compared to
automatic transmission cars. Additionally, cars with fewer cylinders
(4-cylinder engines) tend to be more fuel-efficient than those with 6 or
8 cylinders. This aligns with the general understanding that smaller
engines consume less fuel, making them a more economical choice for
drivers prioritizing fuel efficiency.