---
title: "Airbnb Bangkok 2022 Visualization"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: scroll
mathjax: NULL
social: menu
theme:
version: 4
bootswatch: minty
source_code: embed
css: ./includes/footer.css
includes:
after_body: ./includes/footer.html
runtime: flex
---
```{r setup, include=FALSE}
library(flexdashboard)
# Import libraries
library(readr)
library(ggplot2) # membuat plot statis
library(ggpubr) # export plot statis
library(scales) # mengatur skala plot
library(glue) # setting teks pada tooltip
library(plotly) # membuat plot interaktif
library(lubridate) # date
library(dplyr) # untuk data manipulation
library(tidyverse) # untuk data manipulation
# library(hrbrthemes)
library(leaflet) # Maps
library(treemap)
library(sunburstR)
library(htmltools)
library(htmlwidgets)
library(DT)
```
```{r}
# Settingan Agar tidak muncul numeric value
options(scipen = 9999)
# read data
# df1
airbnb <- read.csv("data_input/listings.csv",encoding= "latin1")
airbnb <- as.data.frame(airbnb)
# df2
review <- read.csv("data_input/reviews_bangkok.csv")
# cleansing data
# airbnb
airbnb <- airbnb %>%
select(-c(5,13,15,18))
# Defined a function
convert_availability <- function(y){
if(y <= 146)
{
y <- "in demand"
}
else
if(y>147 & y<270)
{
y <- "on the average"
}
else
{
y <- "not really on demand"
}
}
# Implementation
airbnb$availability_category <- sapply(X = airbnb$availability_365, FUN = convert_availability)
# Change a column series position
airbnb <- airbnb %>%
relocate(availability_category,.after = availability_365)
# factor
airbnb <- airbnb %>%
mutate(
neighbourhood = as.factor(neighbourhood),
room_type = as.factor(room_type),
availability_category = as.factor(availability_category)
)
# Treatment
airbnb_clean <- replace_na(airbnb,list(reviews_per_month=0))
# review
# review in 2022
review_2022 <- review %>%
filter(year(ymd(date))%in%2022)
# cahnge data type - date
review <- review %>%
mutate(date=as_date(date))
review_2022 <- review_2022 %>%
mutate(date=as_date(date))
#Parse date
# Month
review <- review %>%
mutate(month = month(date,label = T, abbr = F))
review_2022 <- review_2022 %>%
mutate(month = month(date,label = T, abbr = F))
review_2022 <- review_2022 %>%
mutate(bulan = month(date,label = F))
# Year
review <- review %>%
mutate(year = year(date))
review_2022 <- review_2022 %>%
mutate(year = year(date))
```
# MAP
Row {data-height=470}
-----------------------------------------------------------------------
### Bangkok Map
```{r}
region_unik <- airbnb_clean %>%
distinct(neighbourhood, .keep_all = T) %>%
droplevels("neighbourhood") %>%
arrange(neighbourhood)
total_property_region <- airbnb_clean %>%
group_by(neighbourhood) %>%
summarise(count = n()) %>%
ungroup()
total_property_region1 <- airbnb_clean %>%
filter(room_type=="Entire home/apt") %>%
group_by(neighbourhood) %>%
summarise(count = n()) %>%
ungroup()
total_property_region2 <- airbnb_clean %>%
filter(room_type=="Private room") %>%
group_by(neighbourhood) %>%
summarise(count = n()) %>%
ungroup()
# Hotel room
total_property_region3 <- airbnb_clean %>%
filter(room_type=="Hotel room") %>%
group_by(neighbourhood) %>%
summarise(count = n()) %>%
ungroup()
#Buat df baru untuk 50 neighbourhood - Hotel room
# Join dengan df neighbourhood yg lengkap (50), agar ketahuan neighbourhood mana yg hotel room = 0
total_property_region3_new <- total_property_region2[, 1] %>%
left_join(total_property_region3, by = "neighbourhood")
total_property_region3_new <-
replace_na(total_property_region3_new,list(room_type="Hotel room"))
total_property_region3_new <-
replace_na(total_property_region3_new, list(count = 0))
# Shared room
total_property_region4 <- airbnb_clean %>%
filter(room_type=="Shared room") %>%
group_by(neighbourhood) %>%
summarise(count = n()) %>%
ungroup()
#Buat df baru untuk 50 neighbourhood - Shared room
total_property_region4_new <- total_property_region2[,1] %>%
left_join(total_property_region4,by = "neighbourhood")
total_property_region4_new <- replace_na(total_property_region4_new,list(room_type="Shared room"))
total_property_region4_new <- replace_na(total_property_region4_new,list(count = 0))
loca <- region_unik %>%
select(latitude,longitude) %>%
mutate(label=glue("
Region:{total_property_region$neighbourhood} |
Total Property: {total_property_region$count} |
Entire home/apt: {total_property_region1$count} |
Private room: {total_property_region2$count} |
Hotel room: {total_property_region3_new$count} |
Shared room: {total_property_region4_new$count} |"))
# create a leaflet map widget
# add tiles (kerangka peta) from open street map
map1 <- leaflet()
map1 <- addTiles(map1, group = "Default") %>%
addProviderTiles(providers$Esri.WorldImagery, group = "Satelite") %>%
addLayersControl(baseGroups = c("Default", "Satelite"))
map1 <- addMarkers(map=map1, data=loca, popup = loca$label, clusterOptions = markerClusterOptions()) %>% addMiniMap(toggleDisplay = TRUE)
map1
```
# TREND
Row {data-height=475 .tabset}
-----------------------------------------------------------------------
### Monthly Trend
```{r}
options(dplyr.summarise.inform = FALSE)
review_month <- review_2022 %>%
filter(year%in%2022) %>%
group_by(bulan,month) %>%
count(month)
review_month <- review_month %>%
mutate(label9 = glue("
Month : {month}
Total : {comma(n)}
"))
plot_1 <- ggplot(data=review_month, aes(x=bulan, y=n)) + geom_line(color = "red")+geom_point(aes(text=label9))+
scale_x_continuous(breaks = seq(1,12,1))+ scale_y_continuous(labels = comma) + labs(
title = "Total Review Month Trend in Bangkok 2022",
x = "Month",
y = "Total"
)+theme(plot.background = element_rect("#78C2AD"))
ggplotly(plot_1, tooltip = "text")
```
### Daily Trend
```{r}
review_month <- review_2022 %>%
count(bulan,date,month)
review_month <- review_month %>%
mutate(label2 = glue("
Month : {month}
Date : {date}
Total : {n}
"))
plot_1 <- ggplot(data=review_month, aes(x=date, y=n)) + geom_line(color="orange") + geom_point(aes(text=label2)) + scale_x_date(breaks = date_breaks("months"),
labels = date_format("%b")) + labs(
title = "Total Review Day Trend in Bangkok 2022",
x = "Month",
y = "Total"
) + theme(plot.background = element_rect("#78C2AD"))
ggplotly(plot_1, tooltip = "text")
```
# RANKING
Row {data-height=465 .tabset}
-----------------------------------------------------------------------
### Highest Total Review Ranking Based On Room Type
```{r}
# Top properties with the highest total reviews
zz <- airbnb_clean %>%
group_by(room_type,neighbourhood) %>%
summarise(Total = sum(number_of_reviews_ltm)) %>%
arrange(desc(Total)) %>%
slice(1:2)
zz <- zz %>%
mutate(label4=glue("Total review: {comma(Total)}
Room type: {room_type}
Region: {neighbourhood}"))
zz$room_type <- factor(zz$room_type, levels = c("Entire home/apt","Hotel room","Private room","Shared room"))
# GGPLOT
zz$urutan <- rank(x=zz$Total,ties.method = "first")
plot3 <- ggplot(zz,aes(x=Total, y= reorder(room_type,Total), text=label4)) +
geom_col(aes(fill = neighbourhood,group = urutan),
color="black",
position = "dodge")+scale_x_continuous(labels = comma)+
scale_fill_manual(values =c("#78C2AD", "#98B0A9","#375F6F","#364B45")) +
labs(
title = "Top Property - Highest Total Review",
x = "Total Review",
y = "Room Type",
fill = "Region"
) +facet_wrap(facets = "neighbourhood",nrow = 4, scales = "free")+theme(plot.background = element_rect("#78C2AD"))
ggplotly(plot3,tooltip = "text")
```
### Lowest Total Review Ranking Based On Room Type
```{r}
# Lowest 10 properties with the lowest total reviews
zzl <- airbnb_clean %>%
group_by(room_type,neighbourhood) %>%
summarise(Total = sum(number_of_reviews_ltm)) %>%
arrange(Total) %>%
slice(1:10)
zzl <- zzl %>%
mutate(label5=glue("Total review: {Total}
Room type: {room_type}
Region: {neighbourhood}"))
zzl$room_type <- factor(zzl$room_type, levels = c("Entire home/apt","Hotel room","Private room","Shared room"))
# GGPLOT
plot4 <- ggplot(zzl,aes(x=Total, y= reorder(neighbourhood,Total), text=label5)) +
geom_col(aes(fill = room_type),
color="black",
position = "dodge") + scale_fill_manual(values =c("#78C2AD", "#98B0A9","#375F6F","#364B45"))+
labs(
title = "Bottom Ranked Property - Lowest Total Review",
x = "Total Review",
y = "Region",
fill = "Room type"
) +facet_wrap(facets = "room_type",nrow = 4, scales = "free") +theme(plot.background = element_rect("#78C2AD"))
ggplotly(plot4,tooltip = "text")
```
# PROPORTION
Row {data-height=470}
-----------------------------------------------------------------------
### Proportion Chart
```{r}
# Reformat data for the sunburstR package
tot_prop_type <- airbnb_clean %>%
group_by(neighbourhood,room_type,availability_category) %>%
summarise(count = n()) %>%
ungroup()
# Reformat data for the sunburstR package
tot_prop_type <- tot_prop_type %>%
mutate(path = paste(neighbourhood, room_type, availability_category, sep="-")) %>%
dplyr::select(path, count)
# Plot
p <- sunburst(tot_prop_type, legend=T)
p
```
# DISTRIBUTION
Row {data-height=477}
-----------------------------------------------------------------------
### Total Property per Region & Room Type Chart
```{r}
# Create dataset
tot_per_room <- airbnb_clean %>%
group_by(neighbourhood,room_type) %>%
summarise(count = n()) %>%
ungroup()
# Information tooltip
tot_per_room <- tot_per_room %>%
mutate(tempel = glue(
"Region:{neighbourhood}
Room Type: {room_type}
Total:{count}"
))
# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 3
to_add <- data.frame( matrix(NA, empty_bar*nlevels(tot_per_room$room_type), ncol(tot_per_room)) )
colnames(to_add) <- colnames(tot_per_room)
to_add$room_type <- rep(levels(tot_per_room$room_type), each=empty_bar)
tot_per_room <- rbind(tot_per_room, to_add)
tot_per_room <- tot_per_room %>% arrange(room_type)
tot_per_room$id <- seq(1, nrow(tot_per_room))
# Get the name and the y position of each label
label_data <- tot_per_room
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse( angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle+180, angle)
# prepare a data frame for base lines
base_data <- tot_per_room %>%
group_by(room_type) %>%
summarize(start=min(id), end=max(id) - empty_bar) %>%
rowwise() %>%
mutate(title=mean(c(start, end)))
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1,]
# Make the plot
p <- ggplot(tot_per_room, aes(x=as.factor(id), y=count, fill=room_type)) + # Note that id is a factor. If x is numeric, there is some space between the first bar
geom_bar(aes(x=as.factor(id), y=count, fill=room_type), stat="identity", alpha=0.5) +
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data=grid_data, aes(x = end, y = 80, xend = start, yend = 80), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 60, xend = start, yend = 60), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 40, xend = start, yend = 40), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
geom_segment(data=grid_data, aes(x = end, y = 20, xend = start, yend = 20), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
# Add text showing the value of each 100/75/50/25 lines
annotate("text", x = rep(max(tot_per_room$id),4), y = c(20, 40, 60, 80), label = c("20", "40", "60", "80") , color="grey", size=3 , angle=0, fontface="bold", hjust=1) +
geom_bar(aes(x=as.factor(id), y=count, fill=room_type), stat="identity", alpha=0.3) +
ylim(-100,120) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4), "cm")
) +
coord_polar() +
geom_text(data=label_data, aes(x=id, y=count+10, label=neighbourhood, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE ) +
# Add base line information
geom_segment(data=base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.5, size=0.6 , inherit.aes = FALSE ) +
geom_text(data=base_data, aes(x = title, y = -18, label=room_type), hjust=c(1,1,0,0), colour = "black", alpha=1, size=3, fontface="bold", inherit.aes = FALSE)
p
```
# RAW DATASET
Row {data-height=468}
-----------------------------------------------------------------------
```{r}
datatable(airbnb)
```