Sains Data adalah prodi baru di ITS yang baru buka di tahun 2023
## latihan pake titanic
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
#import dataset
titanic = read.csv("C:/Users/hp/Downloads/titanic2.csv.csv")
#Contoh Visualisasi
#1. perbandingan jenis kelamin di titanic
ggplot(titanic, aes(x=Sex)) + #y nya otomatis terisi
geom_bar(fill="pink") + #buat milih mau pake chart apa
labs(title="Perbandingan Jenis Kelamin Penumpang Titanic")
#2. perbandingan data selamat/tidak berdasarkan kelas
ggplot(titanic, aes(x=factor(Pclass), fill=factor(Survived))) +
geom_bar(position="fill") + #biar datanya tumpang tindih, pake position='fill
labs(title = "Proporsi Survival berdasarkan Kelas")
#3. distribusi umur penumpang
ggplot(titanic, aes(x=Age)) +
geom_histogram(bins=20, fill="purple", colors='white') + #lebar histogramnya, lebih kecil, lebih gemuk, dan sebaliknya.
#normalnya antara 20-30 (ideal)
labs(title="Distribusi Umur Penumpang")
## Warning in geom_histogram(bins = 20, fill = "purple", colors = "white"):
## Ignoring unknown parameters: `colours`
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_bin()`).
#4. boxplot
ggplot(titanic, aes(x=factor(Survived), y=Age, fill=factor(Survived))) +
geom_boxplot(outlier.color = 'red') +
labs(title = "Boxplot")
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
#5. Scatter plot
ggplot(titanic, aes(x=Age, y=Fare)) +
geom_point()+
geom_smooth(method="lm", color='blue') +
labs(title = "Pengaruh Umur terhadap Harga Tiket")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 177 rows containing missing values or values outside the scale range
## (`geom_point()`).