Tentang Visualisasi Besaran Data
Merupakan visualisasi yang paling dasar dalam menggambarkan keadaan dari suatu waktu, kondisi, maupun entitas
Memberikan perbandingan antar kategori dan antar waktu
Beberapa diagram yang dapat digunakan untuk visualisasi besaran data adalah sebagai berikut.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggridges)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
data_house <- read.csv("https://raw.githubusercontent.com/gerrydito/Sains-Data-S2/master/Praktikum/Visualisasi%20Data/house_price.csv", header = TRUE, sep=",")
str(data_house)
## 'data.frame': 4600 obs. of 18 variables:
## $ date : chr "2014-05-02 00:00:00" "2014-05-02 00:00:00" "2014-05-02 00:00:00" "2014-05-02 00:00:00" ...
## $ price : num 313000 2384000 342000 420000 550000 ...
## $ bedrooms : num 3 5 3 3 4 2 2 4 3 4 ...
## $ bathrooms : num 1.5 2.5 2 2.25 2.5 1 2 2.5 2.5 2 ...
## $ sqft_living : int 1340 3650 1930 2000 1940 880 1350 2710 2430 1520 ...
## $ sqft_lot : int 7912 9050 11947 8030 10500 6380 2560 35868 88426 6200 ...
## $ floors : num 1.5 2 1 1 1 1 1 2 1 1.5 ...
## $ waterfront : int 0 0 0 0 0 0 0 0 0 0 ...
## $ view : int 0 4 0 0 0 0 0 0 0 0 ...
## $ condition : int 3 5 4 4 4 3 3 3 4 3 ...
## $ sqft_above : int 1340 3370 1930 1000 1140 880 1350 2710 1570 1520 ...
## $ sqft_basement: int 0 280 0 1000 800 0 0 0 860 0 ...
## $ yr_built : int 1955 1921 1966 1963 1976 1938 1976 1989 1985 1945 ...
## $ yr_renovated : int 2005 0 0 0 1992 1994 0 0 0 2010 ...
## $ street : chr "18810 Densmore Ave N" "709 W Blaine St" "26206-26214 143rd Ave SE" "857 170th Pl NE" ...
## $ city : chr "Shoreline" "Seattle" "Kent" "Bellevue" ...
## $ statezip : chr "WA 98133" "WA 98119" "WA 98042" "WA 98008" ...
## $ country : chr "USA" "USA" "USA" "USA" ...
ggplot(data_house, aes(x = city)) +
geom_bar(fill = "steelblue") +
labs(title = "Jumlah Rumah per Kota", x = "Kota", y = "Jumlah Rumah") +
theme_classic() +
coord_flip()
Bar chart dari jumlah rumah yang paling besar
data_house%>%count(city)%>%
ggplot()+
geom_col(aes(x=fct_reorder(as.factor(city),n),y=n), fill="steelblue",
width=0.4) +
scale_y_continuous(expand = c(0,0))+
coord_flip() +
ggtitle("Jumlah Rumah Setiap Kota") +
xlab("") +
ylab("Jumlah Rumah") +
theme_classic() +
theme(plot.title = element_text(hjust = .5, size = 30))
Hanya memunculkan Top 10 dan menginput angka pada chart nya
data_house %>%
count(city) %>%
slice_max(n = 10, order_by = n) %>% # gunakan arrange(n) untuk mengurutkan sebaliknya
ggplot() +
geom_col(aes(x = fct_reorder(as.factor(city), n), y = n), fill = "steelblue", width = 0.4) +
geom_text(aes(x = fct_reorder(as.factor(city), n), y = n, label = n),
position = position_stack(vjust = 0.5), color = "white", size = 3) +
scale_y_continuous(expand = c(0, 0)) +
coord_flip() +
ggtitle("Top 10 Kota dengan Jumlah Rumah Terbanyak") +
xlab("") +
ylab("Jumlah Rumah") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, size = 15))
data_house %>%
count(city) %>%
arrange(desc(n)) %>% # gunakan arrange(n) untuk mengurutkan sebaliknya
ggplot() +
geom_segment(aes(x = fct_reorder(as.factor(city), n), xend = fct_reorder(as.factor(city), n), y = 0, yend = n), color = "green") +
geom_point(aes(x = fct_reorder(as.factor(city), n), y = n), color = "red", size = 2) +
scale_y_continuous(expand = c(0, 0)) +
coord_flip() +
ggtitle("Jumlah Rumah Setiap Kota") +
xlab("") +
ylab("Jumlah Rumah") +
theme_light() +
theme(plot.title = element_text(hjust = 0.5))
Before Agregate
ggplot(data_house,
mapping = aes(x = reorder(city, price), y = price)) +
geom_segment(aes(x = reorder(city, price), xend = reorder (city, price), y = 0 , yend = price), color = "red") +
geom_point(color = "green", sie = 4, alpha = 0.6) +
coord_flip() +
theme_classic()
## Warning in geom_point(color = "green", sie = 4, alpha = 0.6): Ignoring unknown
## parameters: `sie`
ggplot(data_house, aes(x = reorder(city, price), y = price)) +
geom_col(fill = "steelblue") +
geom_text(aes(label = price), vjust = -0.5, color = "black", size = 3) +
coord_flip() +
ggtitle("Agregasi Harga Rumah di Setiap Kota") +
xlab("Kota") +
ylab("Total Harga Rumah") +
theme_classic()
After Agregate
data_house %>%
group_by(city) %>%
summarize(total_price = sum(price)) %>%
arrange(desc(total_price)) %>%
slice_head(n = 10) %>% #hanya top 10 yang ditampilkan
ggplot(aes(x = reorder(city, total_price), y = total_price)) +
geom_col(fill = "steelblue") +
geom_text(aes(label = total_price), vjust = -0.5, color = "black", size = 3) +
coord_flip() +
ggtitle("Top 10 Kota dengan Total Harga Rumah Tertinggi") +
xlab("Kota") +
ylab("Total Harga Rumah") +
theme_classic()
Digunakan jika ingin mengetahui 2 peubah
Contoh: mengelempokan berdasarkan waktu dan kotanya:
data_stacked <- data_house %>%
filter(city %in% c("Seattle", "Bellevue")) %>%
filter(yr_built %in% c(2014, 2013, 2012))
ggplot(data_stacked, aes(x = as.factor(yr_built), y = price, fill = as.factor(city))) +
geom_bar(stat = "identity", position = "dodge", width = 0.7, color = "white") +
labs(fill = "Kota") +
ggtitle("Total Harga Rumah per Kota dan Tahun Pembangunan") +
xlab("Tahun Pembangunan") +
ylab("Total Harga Rumah") +
scale_fill_manual(values = c("Seattle" = "steelblue", "Bellevue" = "darkorange")) +
theme_minimal()
Digunakan ketika ingin membandingkan sedikit peubah (2-3 peubah)
ggplot(data_stacked, aes(x = as.factor(yr_built), y = price, fill = as.factor(city))) +
geom_bar(stat = "identity") +
labs(fill = "Kota") +
ggtitle("Total Harga Rumah per Kota dan Tahun Pembangunan") +
xlab("Tahun Pembangunan") +
ylab("Total Harga Rumah") +
scale_fill_manual(values = c("Seattle" = "steelblue", "Bellevue" = "darkorange")) +
theme_minimal()
data("quakes")
quakes <- tibble::as.tibble(quakes)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
glimpse(quakes)
## Rows: 1,000
## Columns: 5
## $ lat <dbl> -20.42, -20.62, -26.00, -17.97, -20.42, -19.68, -11.70, -28.1…
## $ long <dbl> 181.62, 181.03, 184.10, 181.66, 181.96, 184.31, 166.10, 181.9…
## $ depth <int> 562, 650, 42, 626, 649, 195, 82, 194, 211, 622, 583, 249, 554…
## $ mag <dbl> 4.8, 4.2, 5.4, 4.1, 4.0, 4.0, 4.8, 4.4, 4.7, 4.3, 4.4, 4.6, 4…
## $ stations <int> 41, 15, 43, 19, 11, 12, 43, 15, 35, 19, 13, 16, 19, 10, 94, 1…
head(quakes)
## # A tibble: 6 × 5
## lat long depth mag stations
## <dbl> <dbl> <int> <dbl> <int>
## 1 -20.4 182. 562 4.8 41
## 2 -20.6 181. 650 4.2 15
## 3 -26 184. 42 5.4 43
## 4 -18.0 182. 626 4.1 19
## 5 -20.4 182. 649 4 11
## 6 -19.7 184. 195 4 12
data_stacked <- quakes %>%
filter(stations %in% c("11", "12","15")) %>%
filter(mag %in% c(4.0, 4.2, 4.8))
ggplot(data_stacked, aes(x = as.factor(mag), y = depth, fill = as.factor(stations))) +
geom_bar(stat = "identity", position = "dodge", width = 0.7, color = "white") +
labs(fill = "Station") +
ggtitle("Total Kedalaman gempa dan Magnitude") +
xlab("Magnitude") +
ylab("Kedalaman Gempa") +
scale_fill_manual(values = c("11" = "steelblue", "12" = "darkorange","15" = "aquamarine")) +
theme_minimal()
quakes %>%
count(stations) %>%
slice_max(n = 10, order_by = n) %>%
arrange(desc(n)) %>% # gunakan arrange(n) untuk mengurutkan sebaliknya
ggplot() +
geom_segment(aes(x = fct_reorder(as.factor(stations), n), xend = fct_reorder(as.factor(stations), n), y = 0, yend = n), color = "coral") +
geom_point(aes(x = fct_reorder(as.factor(stations), n), y = n), color = "red", size = 2) +
scale_y_continuous(expand = c(0, 0)) +
coord_flip() +
ggtitle("Jumlah Gempa pada Setiap Stations") +
xlab("") +
ylab("Intensitas Gempa") +
theme_light() +
theme(plot.title = element_text(hjust = 0.5))