LATIHAN VDE W8

Sains Data adalah prodi baru di ITS yang baru buka di tahun 2023

## latihan pake titanic
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# import dataset
titanic = read.csv("titanic2.csv.csv")

#contoh visualisasi

#1. perbandingan jenis kelamin di titanic
ggplot(titanic,aes(x=Sex)) + 
  geom_bar(fill = 'pink') + 
  labs(title = "Perbandingan Jenis Kelamin Penumpang Titanic" )

#2. Perbandingan data selamat atau tidak berdasarkan kelas
ggplot(titanic, aes(x=factor(Pclass), fill = factor(Survived))) +
  geom_bar(position = "fill") +
  labs(title = "Proporsi Survival berdasarkan kelas")

#3. distribusi umur penumpang
#normal bins itu 20-30
ggplot(titanic,aes(x=Age))+
  geom_histogram(bins = 30, fill='purple')+
  labs(title = "Distribusi Umur Penumpang")
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_bin()`).

#4. boxplot
ggplot(titanic, aes(x=factor(Survived), y= Age, fill = factor(Survived)))+
  geom_boxplot(outlier.color = 'red')+
  labs(title = "boxplot")
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

#5. scatterplot
#method lm itu buat nambah garis
ggplot(titanic, aes(x=Age, y=Fare))+
  geom_point()+
  geom_smooth(method="lm", color='blue')+
  labs(title = "Pengaruh umur terhadap harga tiket")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 177 rows containing missing values or values outside the scale range
## (`geom_point()`).