<- read.csv("~/Downloads/train.csv", stringsAsFactors = FALSE)
train <- na.omit(train) df_clean
Titanic Data Analysis
stargazer(
df_clean,type = "text",
title = "Summary Statistics Table for Titanic",
digits = 1,
align = TRUE,
no.space = TRUE
)
Summary Statistics Table for Titanic
========================================
Statistic N Mean St. Dev. Min Max
----------------------------------------
PassengerId 714 448.6 259.1 1 891
Survived 714 0.4 0.5 0 1
Pclass 714 2.2 0.8 1 3
Age 714 29.7 14.5 0.4 80.0
SibSp 714 0.5 0.9 0 5
Parch 714 0.4 0.9 0 6
Fare 714 34.7 52.9 0.0 512.3
----------------------------------------
library(ggplot2)
ggplot(
df_clean,aes(x = factor(Survived, labels = c("No","Yes")), y = Pclass)
+
) geom_boxplot(
fill = "lightblue",
color = "darkblue",
outlier.color = "red"
+
) labs(
x = "Survived",
y = "Passenger Class",
title = "Class Distribution by Survival Status"
+
) theme_minimal()
library(ggplot2)
ggplot(
df_clean,aes(x = Age, fill = factor(Survived, labels = c("No","Yes")))
+
) geom_histogram(binwidth = 2, alpha = 0.7, position = "identity") +
facet_wrap(
~ Survived,
labeller = as_labeller(c(`0` = "Did Not Survive", `1` = "Survived"))
+
) labs(
title = "Histogram of Age by Survival Status",
x = "Age",
y = "Frequency",
fill = "Survived"
+
) theme_light()