#LATIHAN VDE Week-9

##latihan pake titanic
library (ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library (dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
##import dataset
titanic = read.csv ("C:/Users/Thinkpad/Documents/athaya/college sem 2/vde/week9/titanic2.csv.csv")
#1. perbandinngan jenis kelamin
ggplot(titanic, aes(x=Sex)) + 
  geom_bar(fill="pink") +  
  labs (title= "Perbandingan Jenis Kelamin Penumpang Titanic")

#2. perbandingan data selamat atau tidak berdasarkan kelas
ggplot(titanic, aes(x=factor(Pclass), fill = factor(Survived))) +
  geom_bar (position = "fill") + 
  labs (title ="Proporsi Survive berdasarkan Kelas")

#3. persebaran distribusi dari umur penumpang
ggplot (titanic, aes(x=Age)) +
  geom_histogram (bins=20, fill= "maroon") +
  labs (title = "Distribusi Umur Penumpang")
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_bin()`).

#4. boxplot
ggplot (titanic, aes(x=factor(Survived), y= Age, fill=factor(Survived))) +
  geom_boxplot(outlier.color= "red")+
  labs (title = "Boxplot")
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

#5. scatter plot
ggplot (titanic, aes(x= Age, y= Fare)) +
  geom_point() +
  geom_smooth (method= "lm", color = "blue") + 
  labs(title= "Pengaruh Umur terhadap Harga Tiket")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 177 rows containing missing values or values outside the scale range
## (`geom_point()`).