R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
# import dataset
titanic <- read.csv("C:/Users/asus/Downloads/titanic2.csv.csv")

tally(titanic)
##     n
## 1 891
titanic %>%
  select(1:9) %>%
  sample_n(size = 10)
##    PassengerId Survived Pclass
## 1          212        1      2
## 2          780        1      1
## 3          745        1      3
## 4          140        0      1
## 5          119        0      1
## 6          456        1      3
## 7          364        0      3
## 8            2        1      1
## 9          723        0      2
## 10         334        0      3
##                                                     Name    Sex Age SibSp Parch
## 1                             Cameron, Miss. Clear Annie female  35     0     0
## 2  Robert, Mrs. Edward Scott (Elisabeth Walton McMillan) female  43     0     1
## 3                                     Stranden, Mr. Juho   male  31     0     0
## 4                                     Giglio, Mr. Victor   male  24     0     0
## 5                               Baxter, Mr. Quigg Edmond   male  24     0     1
## 6                                     Jalsevac, Mr. Ivan   male  29     0     0
## 7                                        Asim, Mr. Adola   male  35     0     0
## 8    Cumings, Mrs. John Bradley (Florence Briggs Thayer) female  38     1     0
## 9                           Gillespie, Mr. William Henry   male  34     0     0
## 10                       Vander Planke, Mr. Leo Edmondus   male  16     2     0
##                Ticket
## 1        F.C.C. 13528
## 2               24160
## 3   STON/O 2. 3101288
## 4            PC 17593
## 5            PC 17558
## 6              349240
## 7  SOTON/O.Q. 3101310
## 8            PC 17599
## 9               12233
## 10             345764
# summary
glimpse(titanic)
## Rows: 891
## Columns: 12
## $ PassengerId <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ Survived    <int> 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1…
## $ Pclass      <int> 3, 1, 3, 1, 3, 3, 1, 3, 3, 2, 3, 1, 3, 3, 3, 2, 3, 2, 3, 3…
## $ Name        <chr> "Braund, Mr. Owen Harris", "Cumings, Mrs. John Bradley (Fl…
## $ Sex         <chr> "male", "female", "female", "female", "male", "male", "mal…
## $ Age         <dbl> 22, 38, 26, 35, 35, NA, 54, 2, 27, 14, 4, 58, 20, 39, 14, …
## $ SibSp       <int> 1, 1, 0, 1, 0, 0, 0, 3, 0, 1, 1, 0, 0, 1, 0, 0, 4, 0, 1, 0…
## $ Parch       <int> 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 1, 0, 0, 5, 0, 0, 1, 0, 0, 0…
## $ Ticket      <chr> "A/5 21171", "PC 17599", "STON/O2. 3101282", "113803", "37…
## $ Fare        <dbl> 7.2500, 71.2833, 7.9250, 53.1000, 8.0500, 8.4583, 51.8625,…
## $ Cabin       <chr> "", "C85", "", "C123", "", "", "E46", "", "", "", "G6", "C…
## $ Embarked    <chr> "S", "C", "S", "S", "S", "Q", "S", "S", "S", "C", "S", "S"…
str(titanic)
## 'data.frame':    891 obs. of  12 variables:
##  $ PassengerId: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Survived   : int  0 1 1 1 0 0 0 0 1 1 ...
##  $ Pclass     : int  3 1 3 1 3 3 1 3 3 2 ...
##  $ Name       : chr  "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" "Futrelle, Mrs. Jacques Heath (Lily May Peel)" ...
##  $ Sex        : chr  "male" "female" "female" "female" ...
##  $ Age        : num  22 38 26 35 35 NA 54 2 27 14 ...
##  $ SibSp      : int  1 1 0 1 0 0 0 3 0 1 ...
##  $ Parch      : int  0 0 0 0 0 0 0 1 2 0 ...
##  $ Ticket     : chr  "A/5 21171" "PC 17599" "STON/O2. 3101282" "113803" ...
##  $ Fare       : num  7.25 71.28 7.92 53.1 8.05 ...
##  $ Cabin      : chr  "" "C85" "" "C123" ...
##  $ Embarked   : chr  "S" "C" "S" "S" ...
summary(titanic)
##   PassengerId       Survived          Pclass          Name          
##  Min.   :  1.0   Min.   :0.0000   Min.   :1.000   Length:891        
##  1st Qu.:223.5   1st Qu.:0.0000   1st Qu.:2.000   Class :character  
##  Median :446.0   Median :0.0000   Median :3.000   Mode  :character  
##  Mean   :446.0   Mean   :0.3838   Mean   :2.309                     
##  3rd Qu.:668.5   3rd Qu.:1.0000   3rd Qu.:3.000                     
##  Max.   :891.0   Max.   :1.0000   Max.   :3.000                     
##                                                                     
##      Sex                 Age            SibSp           Parch       
##  Length:891         Min.   : 0.42   Min.   :0.000   Min.   :0.0000  
##  Class :character   1st Qu.:20.12   1st Qu.:0.000   1st Qu.:0.0000  
##  Mode  :character   Median :28.00   Median :0.000   Median :0.0000  
##                     Mean   :29.70   Mean   :0.523   Mean   :0.3816  
##                     3rd Qu.:38.00   3rd Qu.:1.000   3rd Qu.:0.0000  
##                     Max.   :80.00   Max.   :8.000   Max.   :6.0000  
##                     NA's   :177                                     
##     Ticket               Fare           Cabin             Embarked        
##  Length:891         Min.   :  0.00   Length:891         Length:891        
##  Class :character   1st Qu.:  7.91   Class :character   Class :character  
##  Mode  :character   Median : 14.45   Mode  :character   Mode  :character  
##                     Mean   : 32.20                                        
##                     3rd Qu.: 31.00                                        
##                     Max.   :512.33                                        
## 
# Exploratory Data Analysis

# 1. Perbandingan jenis kelamin
ggplot(titanic, aes(x=Sex)) +
  geom_bar(fill='pink') + labs(title= "Perbandingan Jenis Kelamin di Titanic")

# 2. Perbandingan data selamat atau tidak
ggplot(titanic, aes(x=factor(Pclass), fill = factor(Survived))) + 
  geom_bar(position = "fill") + labs(title = "Proporsi Survival berdasarkan kelas")

# 3. Distribusi umur penumpang
ggplot(titanic, aes(x=Age)) + 
  geom_histogram(bins= 20, fill = 'purple') + labs(title ="Distribusi umur penumpang")
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_bin()`).

# 4. Boxplot
ggplot(titanic, aes(x=factor(Survived), y=Age, fill=factor(Survived))) + geom_boxplot() +
  labs(title = "Boxplot")
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# 5. Scatterplot
ggplot(titanic, aes(x=Age, y=Fare)) + geom_point() +
  geom_smooth(method = "lm", color= 'blue') +
  labs(title ="Pengaruh umur terhadap harga tiket")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 177 rows containing missing values or values outside the scale range
## (`geom_point()`).