Import data
data = read.csv("titanic2.csv.csv")
DISTRIBUSI USIA PENUMPANG
ggplot(data, aes(x=Age, fill=Sex)) +
geom_histogram(bins=20, position = "dodge") +
scale_fill_manual(values = c("female" = "pink","male" = "cyan"))
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_bin()`).

labs(title = "Distribusi Usia Penumpang")
## $title
## [1] "Distribusi Usia Penumpang"
##
## attr(,"class")
## [1] "labels"
JENIS KELAMIN PENUMPANG
ggplot(data, aes(x=Sex, fill=Sex)) +
geom_bar() +
scale_fill_manual(values = c("male" = "cyan", "female" = "pink")) +
labs(title = "JENIS KELAMIN PENUMPANG") +
geom_text(
stat = "count",
aes(label = ..count..),
vjust = -0.5
)
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

PROPORSI PENUMPANG SELAMAT SETIAP PASSANGER CLASS
ggplot(data, aes(x = factor(Pclass), fill = factor(Survived))) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("0" = "grey", "1" = "green")) +
labs(title = "Proporsi Survival Setiap PClass") +
geom_text(
stat = "count",
aes(label = ..count..),
position = position_dodge(width = 0.9),
vjust = -0.2
)

RELASI UMUR TERHADAP HARGA TIKET
ggplot(data, aes(x=Age, y=Fare)) +
geom_point(color = 'blue') +
geom_smooth(method="lm", color='red') +
labs(title="Pengaruh Usia Terhadap Harga Tiket")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 177 rows containing missing values or values outside the scale range
## (`geom_point()`).
