##Latihan titanic
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

#import dataset
titanic = read.csv("D:/VDE/titanic2.csv.csv")
#1. perbandingan jenis kelamin di titanic
ggplot(titanic, aes(x = Sex)) +
  geom_bar(fill = "pink1") +
  labs(title ="Perbandingan Jenis Kelamin Penumpang Titanic")

#2. perbandingan data selamat atau tidak berdasarkan kelas 
ggplot(titanic, aes(x= factor(Pclass), fill = factor(Survived))) +
  geom_bar(position = "fill") + 
  labs(title = "Proporsi Survival berdasarkan kelas")

#3. distribusi umur penumpang
ggplot(titanic, aes(x= Age)) +
  geom_histogram(bins=20, fill = "blue", colors = "white") +
  labs(title = "Distribusi Umur Penumpang")
## Warning in geom_histogram(bins = 20, fill = "blue", colors = "white"): Ignoring
## unknown parameters: `colours`
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_bin()`).

#4. boxplot
ggplot(titanic, aes(x = factor(Survived), y = Age, fill = factor(Survived))) +
  geom_boxplot(outlie.color = 'red')+
  labs(title = "boxplot")
## Warning in geom_boxplot(outlie.color = "red"): Ignoring unknown parameters:
## `outlie.colour`
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

#5 scatterplot
ggplot(titanic, aes(x= Age, y= Fare)) +
  geom_point() +
  geom_smooth(method = "lm", color ='blue') +
  labs(title = "Pengaruh Umur terhadap Harga Tiket")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 177 rows containing missing values or values outside the scale range
## (`geom_point()`).