Data Review

Import Your Data

In the following code hunk, import your data.

#### Use read_csv() or another function

#### Make sure your data is converted into a tibble. 

#### For demonstration purposes, this example uses the mtcars data.

# 讀取數據
titanic_data <- read.csv("train.csv")

## Gender
# 計算性別的比例
sex_counts <- table(titanic_data$Sex)
sex_df <- as.data.frame(sex_counts)
colnames(sex_df) <- c("Sex", "Count")
sex_df$Percentage <- sex_df$Count / sum(sex_df$Count) * 100

# 繪製圓餅圖
ggplot(sex_df, aes(x = "", y = Percentage, fill = Sex)) +
    geom_bar(stat = "identity", width = 1, color = "black") +
    coord_polar(theta = "y") +
    labs(title = "Gender Distribution on the Titanic",
         x = "",
         y = "") +
    scale_fill_manual(values = c("lightblue", "pink"), labels = c("Female", "Male")) +
    theme_void() +
    theme(legend.title = element_blank())

## Age
# 繪製年齡分佈的密度圖
ggplot(titanic_data, aes(x = Age)) +
    geom_density(fill = "skyblue", alpha = 0.5) +
    labs(title = "Age Density of Titanic Passengers",
         x = "Age",
         y = "Density") +
    theme_minimal()

## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_density()`).

ggplot(titanic_data, aes(x = factor(Pclass))) +
  geom_bar(position = "dodge", fill = "light yellow", color = "black") +
  labs(title = "Passenger Class Distribution on the Titanic",
       x = "Passenger Class",
       y = "Count") +
  theme_minimal()

ggplot(titanic_data, aes(x = Fare)) +
    geom_histogram(binwidth = 10, fill = "skyblue", color = "black", alpha = 0.7) +
    labs(title = "Fare Distribution on the Titanic",
         x = "Fare",
         y = "Count") +
    theme_minimal()

# 計算生存狀態的比例
survival_counts <- table(titanic_data$Survived)
survival_df <- as.data.frame(survival_counts)
colnames(survival_df) <- c("Survived", "Count")
survival_df$Percentage <- survival_df$Count / sum(survival_df$Count) * 100
survival_df$Survived <- factor(survival_df$Survived, levels = c(0, 1), labels = c("No", "Yes"))

# 繪製圓餅圖
ggplot(survival_df, aes(x = "", y = Percentage, fill = Survived)) +
    geom_bar(stat = "identity", width = 1, color = "black") +
    coord_polar(theta = "y") +
    labs(title = "Survival Distribution on the Titanic",
         x = "",
         y = "") +
    scale_fill_manual(values = c("red", "green"), labels = c("No", "Yes")) +
    theme_void() +
    theme(legend.title = element_blank())

# 繪製生存情況的性別堆積長條圖
ggplot(titanic_data, aes(x = Sex, fill = factor(Survived))) +
    geom_bar(position = "stack", color = "black") +
    labs(title = "Survival by Gender on the Titanic",
         x = "Gender",
         y = "Count",
         fill = "Survived") +
    scale_fill_manual(values = c("red", "green"), labels = c("No", "Yes")) +
    theme_minimal()

# 繪製乘客等級與生存情況的堆積長條圖
ggplot(titanic_data, aes(x = factor(Pclass), fill = factor(Survived))) +
    geom_bar(position = "stack", color = "black") +
    labs(title = "Survival by Passenger Class on the Titanic",
         x = "Passenger Class",
         y = "Count",
         fill = "Survived") +
    scale_fill_manual(values = c("red", "green"), labels = c("No", "Yes")) +
    theme_minimal()

# 繪製生存年齡的散點圖
ggplot(titanic_data, aes(x = Age, y = factor(Survived))) +
    geom_point(aes(color = factor(Survived)), size = 3, alpha = 0.7) +
    labs(title = "Survival by Age on the Titanic",
         x = "Age",
         y = "Survived",
         color = "Survived") +
    scale_color_manual(values = c("red", "green"), labels = c("No", "Yes")) +
    theme_minimal()

## Warning: Removed 177 rows containing missing values or values outside the scale range
## (`geom_point()`).

Data Review

pppcwen

11/10/2020

Import Your Data