MAP

Row

Bangkok Map

TREND

Row

Monthly Trend

Daily Trend

RANKING

Row

Highest Total Review Ranking Based On Room Type

Lowest Total Review Ranking Based On Room Type

PROPORTION

Row

Proportion Chart

Legend

DISTRIBUTION

Row

Total Property per Region & Room Type Chart

RAW DATASET

Row

---
title: "Airbnb Bangkok 2022 Visualization"
output: 
  flexdashboard::flex_dashboard:
    orientation: rows
    vertical_layout: scroll
    mathjax: NULL
    social: menu
    theme: 
      version: 4
      bootswatch: minty
    source_code: embed
    css: ./includes/footer.css
    includes:
      after_body: ./includes/footer.html
    runtime: flex
---


```{r setup, include=FALSE}
library(flexdashboard)

# Import libraries
library(readr) 
library(ggplot2) # membuat plot statis 
library(ggpubr) # export plot statis
library(scales) # mengatur skala plot
library(glue) # setting teks pada tooltip
library(plotly) # membuat plot interaktif
library(lubridate) # date
library(dplyr) # untuk data manipulation
library(tidyverse) # untuk data manipulation
# library(hrbrthemes)
library(leaflet) # Maps
library(treemap)
library(sunburstR)
library(htmltools)
library(htmlwidgets)
library(DT)
```


```{r}
# Settingan Agar tidak muncul numeric value
options(scipen = 9999)

# read data 
# df1
airbnb <- read.csv("data_input/listings.csv",encoding= "latin1")
airbnb <- as.data.frame(airbnb)

# df2
review <- read.csv("data_input/reviews_bangkok.csv")


# cleansing data
# airbnb
      airbnb <- airbnb %>% 
        select(-c(5,13,15,18))
      
      # Defined a function
      
        convert_availability <- function(y){
          if(y <= 146)
          {
            y <- "in demand"
          }
          else
            if(y>147 & y<270)
            {
              y <- "on the average"
            }
          else
          {
            y <- "not really on demand"
          }
        }
      
      # Implementation
        airbnb$availability_category <- sapply(X = airbnb$availability_365, FUN = convert_availability)
      
      # Change a column series position
        airbnb <- airbnb %>% 
          relocate(availability_category,.after = availability_365)
      # factor
        airbnb <- airbnb %>%
          mutate(
            neighbourhood = as.factor(neighbourhood),
            room_type = as.factor(room_type),
            availability_category = as.factor(availability_category)
          )
      # Treatment
        airbnb_clean <- replace_na(airbnb,list(reviews_per_month=0))

        
# review
  # review in 2022
    review_2022 <- review %>% 
      filter(year(ymd(date))%in%2022)
    
  # cahnge data type - date
    review <- review %>% 
      mutate(date=as_date(date))
    
    review_2022 <- review_2022 %>% 
      mutate(date=as_date(date))
  
  #Parse date
  # Month
    review <- review %>% 
      mutate(month = month(date,label = T, abbr = F))
    review_2022 <- review_2022 %>% 
      mutate(month = month(date,label = T, abbr = F))
    review_2022 <- review_2022 %>% 
      mutate(bulan = month(date,label = F))

  # Year
    review <- review %>% 
      mutate(year = year(date))
    review_2022 <- review_2022 %>% 
      mutate(year = year(date))
    
```

# MAP

Row {data-height=470}
-----------------------------------------------------------------------

### Bangkok Map

```{r}
region_unik <- airbnb_clean %>%
      distinct(neighbourhood, .keep_all = T) %>%
      droplevels("neighbourhood") %>%
      arrange(neighbourhood)
    
    total_property_region <- airbnb_clean %>% 
      group_by(neighbourhood) %>% 
      summarise(count = n()) %>% 
      ungroup()
    
    total_property_region1 <- airbnb_clean %>% 
      filter(room_type=="Entire home/apt") %>% 
      group_by(neighbourhood) %>% 
      summarise(count = n()) %>% 
      ungroup()
    
    total_property_region2 <- airbnb_clean %>% 
      filter(room_type=="Private room") %>% 
      group_by(neighbourhood) %>% 
      summarise(count = n()) %>% 
      ungroup()
    
    # Hotel room
    total_property_region3 <- airbnb_clean %>% 
      filter(room_type=="Hotel room") %>% 
      group_by(neighbourhood) %>% 
      summarise(count = n()) %>% 
      ungroup()
    #Buat df baru untuk 50 neighbourhood - Hotel room
    # Join dengan df neighbourhood yg lengkap (50), agar ketahuan neighbourhood mana yg hotel room = 0
    
    total_property_region3_new <- total_property_region2[, 1] %>%
      left_join(total_property_region3, by = "neighbourhood")
    
    total_property_region3_new <-
      replace_na(total_property_region3_new,list(room_type="Hotel room"))
    
    total_property_region3_new <-
      replace_na(total_property_region3_new, list(count = 0))
    
    
    # Shared room
    total_property_region4 <- airbnb_clean %>% 
      filter(room_type=="Shared room") %>% 
      group_by(neighbourhood) %>% 
      summarise(count = n()) %>% 
      ungroup() 
    #Buat df baru untuk 50 neighbourhood - Shared room
    total_property_region4_new <- total_property_region2[,1] %>% 
      left_join(total_property_region4,by = "neighbourhood")
    total_property_region4_new <- replace_na(total_property_region4_new,list(room_type="Shared room"))
    total_property_region4_new <- replace_na(total_property_region4_new,list(count = 0))
    
    
    loca <- region_unik %>%
      select(latitude,longitude) %>% 
      mutate(label=glue("
                        Region:{total_property_region$neighbourhood} |
                        
                        Total Property: {total_property_region$count} |
                        
                        Entire home/apt: {total_property_region1$count} |
                        
                        Private room: {total_property_region2$count} |
                        
                        Hotel room: {total_property_region3_new$count} | 
                        
                        Shared room: {total_property_region4_new$count} |"))
    
    # create a leaflet map widget
    # add tiles (kerangka peta) from open street map
    map1 <- leaflet() 
    map1 <- addTiles(map1, group = "Default") %>% 
      addProviderTiles(providers$Esri.WorldImagery, group = "Satelite") %>%
      addLayersControl(baseGroups = c("Default", "Satelite"))
    map1 <- addMarkers(map=map1, data=loca, popup = loca$label, clusterOptions = markerClusterOptions()) %>% addMiniMap(toggleDisplay = TRUE)
    
    map1
```


# TREND

Row {data-height=475 .tabset} 
-----------------------------------------------------------------------

### Monthly Trend

```{r}

options(dplyr.summarise.inform = FALSE)

review_month <- review_2022 %>%
  filter(year%in%2022) %>% 
  group_by(bulan,month) %>% 
  count(month)


review_month <- review_month %>% 
  mutate(label9 = glue("
                       Month : {month}
                       Total : {comma(n)}
                       "))
plot_1 <- ggplot(data=review_month, aes(x=bulan, y=n)) + geom_line(color = "red")+geom_point(aes(text=label9))+
  scale_x_continuous(breaks = seq(1,12,1))+ scale_y_continuous(labels = comma) + labs(
    title = "Total Review Month Trend in Bangkok 2022",
    x = "Month",
    y = "Total"
  )+theme(plot.background = element_rect("#78C2AD"))
ggplotly(plot_1, tooltip = "text")
```


### Daily Trend
```{r}


review_month <- review_2022 %>% 
  count(bulan,date,month)

review_month <- review_month %>% 
  mutate(label2 = glue("
                       Month : {month}
                       Date : {date}
                       Total : {n}
                       "))
plot_1 <- ggplot(data=review_month, aes(x=date, y=n)) + geom_line(color="orange") + geom_point(aes(text=label2)) + scale_x_date(breaks = date_breaks("months"),
  labels = date_format("%b")) + labs(
    title = "Total Review Day Trend in Bangkok 2022",
    x = "Month",
    y = "Total"
  ) + theme(plot.background = element_rect("#78C2AD"))

ggplotly(plot_1, tooltip = "text")
```

# RANKING 

Row {data-height=465 .tabset}
-----------------------------------------------------------------------
### Highest Total Review Ranking Based On Room Type

```{r}
# Top properties with the highest total reviews
zz <- airbnb_clean %>% 
  group_by(room_type,neighbourhood) %>% 
  summarise(Total = sum(number_of_reviews_ltm)) %>% 
  arrange(desc(Total)) %>% 
  slice(1:2)

zz <- zz %>% 
  mutate(label4=glue("Total review: {comma(Total)}
                     Room type: {room_type}
                     Region: {neighbourhood}"))

zz$room_type <- factor(zz$room_type, levels = c("Entire home/apt","Hotel room","Private room","Shared room"))

# GGPLOT

zz$urutan <- rank(x=zz$Total,ties.method = "first")

plot3 <- ggplot(zz,aes(x=Total, y= reorder(room_type,Total), text=label4)) +
  geom_col(aes(fill = neighbourhood,group = urutan),
           color="black",
           position = "dodge")+scale_x_continuous(labels = comma)+
  scale_fill_manual(values =c("#78C2AD", "#98B0A9","#375F6F","#364B45")) +
  labs(
    title = "Top Property - Highest Total Review",
    x = "Total Review",
    y = "Room Type",
    fill = "Region"
  ) +facet_wrap(facets = "neighbourhood",nrow = 4, scales = "free")+theme(plot.background = element_rect("#78C2AD"))

ggplotly(plot3,tooltip = "text")

```


### Lowest Total Review Ranking Based On Room Type

```{r}
# Lowest 10 properties with the lowest total reviews
zzl <- airbnb_clean %>% 
  group_by(room_type,neighbourhood) %>% 
  summarise(Total = sum(number_of_reviews_ltm)) %>% 
  arrange(Total) %>% 
  slice(1:10)

zzl <- zzl %>% 
  mutate(label5=glue("Total review: {Total}
                     Room type: {room_type}
                     Region: {neighbourhood}"))

zzl$room_type <- factor(zzl$room_type, levels = c("Entire home/apt","Hotel room","Private room","Shared room"))

# GGPLOT
plot4 <- ggplot(zzl,aes(x=Total, y= reorder(neighbourhood,Total), text=label5)) +
  geom_col(aes(fill = room_type),
           color="black",
           position = "dodge") + scale_fill_manual(values =c("#78C2AD", "#98B0A9","#375F6F","#364B45"))+
  labs(
    title = "Bottom Ranked Property - Lowest Total Review",
    x = "Total Review",
    y = "Region",
    fill = "Room type"
  ) +facet_wrap(facets = "room_type",nrow = 4, scales = "free") +theme(plot.background = element_rect("#78C2AD"))

ggplotly(plot4,tooltip = "text")
```
# PROPORTION

Row {data-height=470}
-----------------------------------------------------------------------

### Proportion Chart

```{r}
# Reformat data for the sunburstR package
tot_prop_type <- airbnb_clean %>% 
  group_by(neighbourhood,room_type,availability_category) %>% 
  summarise(count = n()) %>% 
  ungroup()


# Reformat data for the sunburstR package
tot_prop_type <- tot_prop_type %>% 
  mutate(path = paste(neighbourhood, room_type, availability_category, sep="-")) %>% 
  dplyr::select(path, count)

# Plot
p <- sunburst(tot_prop_type, legend=T) 
p
```
# DISTRIBUTION

Row {data-height=477}
-----------------------------------------------------------------------

### Total Property per Region & Room Type Chart

```{r}
# Create dataset
tot_per_room <- airbnb_clean %>% 
  group_by(neighbourhood,room_type) %>% 
  summarise(count = n()) %>% 
  ungroup()

# Information tooltip
tot_per_room <- tot_per_room %>% 
  mutate(tempel = glue(
    "Region:{neighbourhood}
     Room Type: {room_type}
     Total:{count}"
  ))

# Set a number of 'empty bar' to add at the end of each group

empty_bar <- 3
to_add <- data.frame( matrix(NA, empty_bar*nlevels(tot_per_room$room_type), ncol(tot_per_room)) )
colnames(to_add) <- colnames(tot_per_room)

to_add$room_type <- rep(levels(tot_per_room$room_type), each=empty_bar)
tot_per_room <- rbind(tot_per_room, to_add)

tot_per_room <- tot_per_room %>% arrange(room_type)
tot_per_room$id <- seq(1, nrow(tot_per_room))

# Get the name and the y position of each label
label_data <- tot_per_room
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)

label_data$hjust <- ifelse( angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle+180, angle)

# prepare a data frame for base lines
base_data <- tot_per_room %>% 
  group_by(room_type) %>% 
  summarize(start=min(id), end=max(id) - empty_bar) %>% 
  rowwise() %>% 
  mutate(title=mean(c(start, end)))
 
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1,]

# Make the plot
p <- ggplot(tot_per_room, aes(x=as.factor(id), y=count, fill=room_type)) +   # Note that id is a factor. If x is numeric, there is some space between the first bar
  
  geom_bar(aes(x=as.factor(id), y=count, fill=room_type), stat="identity", alpha=0.5) +
  
   # Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
  geom_segment(data=grid_data, aes(x = end, y = 80, xend = start, yend = 80), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
  geom_segment(data=grid_data, aes(x = end, y = 60, xend = start, yend = 60), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
  geom_segment(data=grid_data, aes(x = end, y = 40, xend = start, yend = 40), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
  geom_segment(data=grid_data, aes(x = end, y = 20, xend = start, yend = 20), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
  
  
   # Add text showing the value of each 100/75/50/25 lines
  annotate("text", x = rep(max(tot_per_room$id),4), y = c(20, 40, 60, 80), label = c("20", "40", "60", "80") , color="grey", size=3 , angle=0, fontface="bold", hjust=1) +
  
  geom_bar(aes(x=as.factor(id), y=count, fill=room_type), stat="identity", alpha=0.3) +
  ylim(-100,120) +
  theme_minimal() +
  theme(
    legend.position = "none",
    axis.text = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank(),
    plot.margin = unit(rep(-1,4), "cm") 
  ) +
  coord_polar() + 
  geom_text(data=label_data, aes(x=id, y=count+10, label=neighbourhood, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE ) +
  
 # Add base line information
  geom_segment(data=base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.5, size=0.6 , inherit.aes = FALSE )  +
  geom_text(data=base_data, aes(x = title, y = -18, label=room_type), hjust=c(1,1,0,0), colour = "black", alpha=1, size=3, fontface="bold", inherit.aes = FALSE)

p
```

# RAW DATASET

Row {data-height=468}
-----------------------------------------------------------------------

```{r}
datatable(airbnb)
```