TUGAS DATA TRAIN

#wrangle
library('dplyr') #data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library('tidyr') #data manipulation
library('readr') #data input
library('stringr') #tring manipulation
library('forcats') # factor manipulation
library('modelr') #factor manipulation
library('ggplot2') # data visualization
setwd('C:/Users/kub/OneDrive/Documents/VDE')
train= read.csv('datatrain.csv')
head(train)
##   PassengerId Survived Pclass
## 1           1        0      3
## 2           2        1      1
## 3           3        1      3
## 4           4        1      1
## 5           5        0      3
## 6           6        0      3
##                                                  Name    Sex Age SibSp Parch
## 1                             Braund, Mr. Owen Harris   male  22     1     0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female  38     1     0
## 3                              Heikkinen, Miss. Laina female  26     0     0
## 4        Futrelle, Mrs. Jacques Heath (Lily May Peel) female  35     1     0
## 5                            Allen, Mr. William Henry   male  35     0     0
## 6                                    Moran, Mr. James   male  NA     0     0
##             Ticket    Fare Cabin Embarked
## 1        A/5 21171  7.2500              S
## 2         PC 17599 71.2833   C85        C
## 3 STON/O2. 3101282  7.9250              S
## 4           113803 53.1000  C123        S
## 5           373450  8.0500              S
## 6           330877  8.4583              Q
train$Survived <- factor(train$Survived)
head(train)
##   PassengerId Survived Pclass
## 1           1        0      3
## 2           2        1      1
## 3           3        1      3
## 4           4        1      1
## 5           5        0      3
## 6           6        0      3
##                                                  Name    Sex Age SibSp Parch
## 1                             Braund, Mr. Owen Harris   male  22     1     0
## 2 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female  38     1     0
## 3                              Heikkinen, Miss. Laina female  26     0     0
## 4        Futrelle, Mrs. Jacques Heath (Lily May Peel) female  35     1     0
## 5                            Allen, Mr. William Henry   male  35     0     0
## 6                                    Moran, Mr. James   male  NA     0     0
##             Ticket    Fare Cabin Embarked
## 1        A/5 21171  7.2500              S
## 2         PC 17599 71.2833   C85        C
## 3 STON/O2. 3101282  7.9250              S
## 4           113803 53.1000  C123        S
## 5           373450  8.0500              S
## 6           330877  8.4583              Q
summary(train)
##   PassengerId    Survived     Pclass          Name               Sex           
##  Min.   :  1.0   0:549    Min.   :1.000   Length:891         Length:891        
##  1st Qu.:223.5   1:342    1st Qu.:2.000   Class :character   Class :character  
##  Median :446.0            Median :3.000   Mode  :character   Mode  :character  
##  Mean   :446.0            Mean   :2.309                                        
##  3rd Qu.:668.5            3rd Qu.:3.000                                        
##  Max.   :891.0            Max.   :3.000                                        
##                                                                                
##       Age            SibSp           Parch           Ticket         
##  Min.   : 0.42   Min.   :0.000   Min.   :0.0000   Length:891        
##  1st Qu.:20.12   1st Qu.:0.000   1st Qu.:0.0000   Class :character  
##  Median :28.00   Median :0.000   Median :0.0000   Mode  :character  
##  Mean   :29.70   Mean   :0.523   Mean   :0.3816                     
##  3rd Qu.:38.00   3rd Qu.:1.000   3rd Qu.:0.0000                     
##  Max.   :80.00   Max.   :8.000   Max.   :6.0000                     
##  NA's   :177                                                        
##       Fare           Cabin             Embarked        
##  Min.   :  0.00   Length:891         Length:891        
##  1st Qu.:  7.91   Class :character   Class :character  
##  Median : 14.45   Mode  :character   Mode  :character  
##  Mean   : 32.20                                        
##  3rd Qu.: 31.00                                        
##  Max.   :512.33                                        
## 
pclass_counts <- train %>% 
  group_by(Pclass) %>% 
  summarise(count = n()) %>% 
  mutate(percentage = count / sum(count) * 100,
         label = paste0(Pclass, " (", round(percentage, 1), "%)"))

# Define custom colors
custom_colors <- c("1" = "#4D869C", "2" = "#B3E8E5", "3" = "#F2F7A1")

p_pie_pclass = ggplot(pclass_counts, aes(x = "", y = percentage, fill = as.factor(Pclass))) +
  geom_bar(width = 1, stat = "identity") +
  coord_polar("y") +
  theme_void() +
  theme(legend.position = "right") +
  labs(fill = "Pclass") +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5)) +
  scale_fill_manual(values = custom_colors)

p_pie_pclass

sex_counts <- train %>%
  group_by(Sex) %>%
  summarise(count = n()) %>%
  mutate(percentage = count / sum(count) * 100,
         label = paste0(Sex, " (", round(percentage, 1), "%)"))

custom_colors <- c("#4D869C", "#F2F7A1")

p_pie_sex <- ggplot(sex_counts, aes(x = "", y = percentage, fill = as.factor(Sex))) +
  geom_bar(width = 1, stat = "identity") +
  coord_polar("y") +
  theme_void() +
  theme(legend.position = "right") +
  labs(fill = "Sex") +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5)) +
  scale_fill_manual(values = custom_colors)

p_pie_sex

p_sibsp_bar = ggplot(train, aes(x = factor(SibSp), fill = Survived)) +
  geom_bar(position = "dodge") +
  labs(x = "Number of Siblings/Spouses (SibSp)", y = "Count", title = "Bar Plot of SibSp by Survival Status") +
  scale_fill_manual(values = custom_colors) +
  theme_minimal() +
  theme(legend.position = "right")

p_sibsp_bar

p_fare_boxplot = ggplot(train, aes(x = as.factor(Pclass), y = Fare, fill = Survived)) +
  geom_boxplot() +
  labs(x = "Pclass", y = "Fare", title = "Boxplot of Fare by Class and Survival Status") +
  scale_fill_manual(values = custom_colors) +
  theme_minimal() +
  theme(legend.position = "right")

p_fare_boxplot

p_embarked_bar = ggplot(train, aes(x = Embarked, fill = Survived)) +
  geom_bar(position = "dodge") +
  labs(x = "Embarked", y = "Count", title = "Bar Plot of Embarked Locations by Survival Status") +
  scale_fill_manual(values = custom_colors) +
  theme_minimal() +
  theme(legend.position = "right")

p_embarked_bar

custom_colors <- c("#4D869C", "#F2F7A1")

p_age = ggplot(train) +
  geom_freqpoly(mapping = aes(x = Age, color = Survived), binwidth = 1) +
  scale_color_manual(values = custom_colors) +
  theme(legend.position = "right")

p_sex = ggplot(train, mapping = aes(x = Sex, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Sex') +
  scale_fill_manual(values = custom_colors) +
  theme(legend.position = "right")

p_class = ggplot(train, mapping = aes(x = Pclass, fill = Survived, colour = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Pclass') +
  scale_fill_manual(values = custom_colors) +
  theme(legend.position = "none")

p_emb = ggplot(train, aes(Embarked, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Embarked') +
  scale_fill_manual(values = custom_colors) +
  theme(legend.position = "none")

p_sib = ggplot(train, aes(SibSp, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'SibSp') +
  scale_fill_manual(values = custom_colors) +
  theme(legend.position = "none")

p_par = ggplot(train, aes(Parch, fill = Survived)) +
  geom_bar(stat='count', position='fill') +
  labs(x = 'Parch') +
  scale_fill_manual(values = custom_colors) +
  theme(legend.position = "none")

p_fare = ggplot(train) +
  geom_freqpoly(mapping = aes(Fare, color = Survived), binwidth = 0.05) +
  scale_x_log10() +
  scale_color_manual(values = custom_colors) +
  theme(legend.position = "none")

p_age
Fig. 2

Fig. 2

p_sex
Fig. 2

Fig. 2

p_fare
Fig. 2

Fig. 2

p_class
Fig. 2

Fig. 2

p_emb
Fig. 2

Fig. 2

p_sib
Fig. 2

Fig. 2

p_par
Fig. 2

Fig. 2