LATIHAN VDE W8

## Latihan Menggunakan Titanic
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

#import dataset
titanic = read.csv2("C:/Users/HP/Documents/titanic2.csv (1).csv")

Contoh Visualisasi

#1. Perbandingan Jenis Kelamin di Titanic
ggplot(titanic, aes(x=Sex)) +
  geom_bar(fill="green") +
  labs(title = "Perbandingan Jenis Kelamin Penumpang Titanic")

#2. Perbandingan Data Selamat atau Tidak berdasarkan Kelas
ggplot(titanic, aes(x=factor(Pclass), fill = factor(Survived))) +
  geom_bar(position = "fill") +
  labs(title = "Proporsi Survival Berdasarkan Kelas")

#3. Distribusi Umur Penumpang
titanic$Age <- as.numeric(gsub(",", ".", titanic$Age))
titanic$Fare <- as.numeric(gsub(",", ".", titanic$Fare))
## Warning: NAs introduced by coercion
ggplot(titanic, aes(x = Age)) +
  geom_histogram(bins = 20, fill = "purple") +
  labs(title = "Distribusi Umur Penumpang")
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_bin()`).

#4. Boxplot
titanic$Age <- as.numeric(gsub(",", ".", titanic$Age))
titanic$Fare <- as.numeric(gsub(",", ".", titanic$Fare))
ggplot(titanic, aes(x=factor(Survived), y=Age, fill=factor(Survived))) +
  geom_boxplot(outlier.color="red")+
  labs(title = "boxplot")
## Warning: Removed 177 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

#5. Scatter Plot
titanic$Age <- as.numeric(gsub(",", ".", titanic$Age))
titanic$Fare <- as.numeric(gsub(",", ".", titanic$Fare))
ggplot(titanic, aes(x=Age, y=Fare)) +
  geom_point() +
  geom_smooth(method="lm", color="blue")+
  labs(title = "Pengaruh Umur terhadap Harga Tiket")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 197 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 197 rows containing missing values or values outside the scale range
## (`geom_point()`).