# load packages needed
library(dplyr)
library(ggplot2)
library(knitr)
library(AER)
library(viridis)
library(hrbrthemes)
library(xtable)
library(knitr)
library(kableExtra)
library(DT)
library(stargazer)
library(plotly)
# set working directory 
setwd("C:/Users/Asus/Dropbox/Research Project/covid19_restaurant/R coding")

# load file
load(file="C:/Users/Asus/Dropbox/Research Project/covid19_restaurant/data_csv/df_all2.Rdata")

1 Descriptive Analysis

1.1 Outlets

# Create time variable 
df_all2$t_id <- paste0((df_all2$STDR_YY_CD-2000), "Q", df_all2$STDR_QU_CD)


# STOR_CO
overall_STOR_CO <- df_all2 %>% 
                    group_by(t_id) %>%
                    summarize(store = sum(STOR_CO))


ggplot(overall_STOR_CO, aes(x=t_id,  y=store)) + 
  geom_line(aes(group=1)) + geom_point() + 
  labs(title = "Change in Total Stores from 2017Q1 to 2020Q2", 
       caption ="(Starting 2019Q1, the number of types of small business has increase from 45 to 100.)")+
  xlab("Time") + ylab("No. of Stores")

### Stores of Food and Beverage (F&B)

# Create svc 
df_all2$svc <- as.numeric(gsub("CS", "", df_all2$SVC_IND))

# Create variable for types of markets
# A: Local; D: Developing, R: Old, U: Tourist
df_all2$mk_type <- factor(df_all2$TRDAR_SE_CD,levels = c("A", "D", "R", "U"), 
                          labels = c("Local", "Developing", "Old", "Tourist"))


# Food and Beverage: svc<200000
food_STOR_CO <- df_all2 %>%
                  filter(svc<200000) %>%
                  group_by(STDR_YY_CD, STDR_QU_CD, SVC_DES) %>%
                  summarize(Stores = sum(STOR_CO))

 
ggplot(food_STOR_CO, aes(y=Stores, x=interaction(STDR_QU_CD, STDR_YY_CD, lex.order = TRUE), group=SVC_DES)) +
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES)) + 
  labs(title = "Change in Total Stores of Food/Beverage from 2017Q1 to 2020Q2", 
       caption ="(Starting 2019Q1, the number of types of small business has increase from 45 to 100.)", 
       color="Type of Food/Beverage")+
  xlab("Time") + ylab("No. of Stores") 

# Food and Beverage by type 
food_STOR_CO2 <- df_all2 %>%
                  filter(svc<200000) %>%
                  group_by(t_id, mk_type, SVC_DES) %>%
                  summarize(Stores = sum(STOR_CO))

ggplot(food_STOR_CO2, aes(y=Stores, x=t_id, group=SVC_DES)) + 
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES)) + 
  labs(title = "Change in Total Stores of F&B by Market Types", 
       caption ="(Starting 2019Q1, the number of types of small business has increase from 45 to 100.)", 
       color="Type of Food/Beverage")+
  xlab("Time") + ylab("No. of Stores") +
  facet_wrap(~mk_type, scale="free") + 
  theme(axis.text.x = element_text( angle=70, hjust= 0.5, vjust = 0.5))

# Food and Beverage related: Supermarket, Convenience, Pre-cooked meal
food_sim_svc <- c(300001, 300002, 300010)
food_sim_STOR_CO <- df_all2 %>%
                      filter(svc %in% food_sim_svc) %>%
                      group_by(t_id, SVC_DES) %>%
                      summarize(Stores = sum(STOR_CO))

ggplot(food_sim_STOR_CO, aes(y=Stores, x=t_id, group=SVC_DES)) + 
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES)) + 
  labs(title = "Change in Total Stores of similar to F&B  from 2017Q1 to 2020Q2", 
       color="Type of Food/Beverage")+
  xlab("Time") + ylab("No. of Stores") 

# by market type
food_sim_STOR_CO2 <- df_all2 %>%
                      filter(svc %in% food_sim_svc) %>%
                      group_by(t_id, mk_type, SVC_DES) %>%
                      summarize(Stores = sum(STOR_CO))

ggplot(food_sim_STOR_CO2, aes(y=Stores, x=t_id, group=SVC_DES)) + 
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES)) + 
  labs(title = "Change in Total Stores of similar to F&B by Market Types ", 
       color="Type of Food/Beverage")+
  xlab("Time") + ylab("No. of Stores") + 
  facet_wrap(~mk_type, scale="free_x") +
  theme(axis.text.x = element_text( angle=70, hjust= 0.5, vjust = 0.5))

Summary of Descriptive Analysis of Stores

Overall, there have been a little changes in numbers of restaurants and simmilar sectors.

1.2 Sales

# Two key variables:
# 1. THSMON_SELNG_AMT (Amount of Sales)
# 2. THSMON_SELNG_CO  (Number of Sales; Tickets)

total_sale <- df_all2 %>% group_by(t_id) %>%
                 summarize(sales = sum(THSMON_SELNG_AMT, na.rm=TRUE))

ggplot(total_sale, aes(x=t_id, y=sales)) +
  geom_line(aes(group=1)) + geom_point() + 
  labs(title = "Change in Total Sales from 2017Q1 to 2020Q2", 
       caption ="(Starting 2019Q1, the number of types of small business has increase from 45 to 100.)")+
  xlab("Time") + ylab("Sales")

# sales by market type
sale_mkt <- df_all2 %>% group_by(t_id, mk_type) %>%
              summarize(sales = sum(THSMON_SELNG_AMT, na.rm=TRUE))

ggplot(sale_mkt, aes(x=t_id, y=sales, group=mk_type)) +
  geom_line(aes(color=mk_type)) + geom_point(aes(color=mk_type)) + 
  labs(title = "Sales by Market Types from 2017Q1 to 2020Q2", 
       caption ="(Starting 2019Q1, the number of types of small business has increase from 45 to 100.)", color = "Type of Markets")+
  xlab("Time") + ylab("Sales") 

### Sales of F&B

# sales of restaurants
sale_fb <- df_all2 %>%
                  filter(svc<200000) %>%
                  group_by(t_id, SVC_DES) %>%
                  summarize(Stores = sum(THSMON_SELNG_AMT, na.rm=TRUE))

ggplot(sale_fb, aes(y=Stores, x=t_id, group=SVC_DES)) + 
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES)) + 
  labs(title = "Change in Sales of F&B  from 2017Q1 to 2020Q2", 
       color="Type of Food/Beverage")+
  xlab("Time") + ylab("Sales") 

sale_fb_mk <- df_all2 %>% 
                filter(svc<200000) %>%
                group_by(t_id, mk_type, SVC_DES) %>%
                summarize(Stores = sum(THSMON_SELNG_AMT, na.rm=TRUE))


ggplot(sale_fb_mk, aes(y=Stores, x=t_id, group=SVC_DES)) + 
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES)) + 
  labs(title = "Change in Sales of F&B  from 2017Q1 to 2020Q2", 
       color="Type of Food/Beverage")+
  xlab("Time") + ylab("Sales") +
  facet_wrap(~mk_type, scale="free_x") +
  theme(axis.text.x = element_text( angle=70, hjust= 0.5, vjust = 0.5))

# Sales per outlet 
df_all2$sales_ind <- df_all2$THSMON_SELNG_AMT / df_all2$STOR_CO
df_all2$sales_ind[is.infinite(df_all2$sales_ind)] <- NA

sales.ind <- df_all2 %>%
              group_by(t_id) %>%
              summarize(Avg_Sales = mean(sales_ind, na.rm=TRUE))

ggplot(sales.ind, aes(x=t_id, y=Avg_Sales)) + geom_line(aes(group=1)) +
  labs(title="Average Sales Per Store for All Sectors",        
       caption ="(Starting 2019Q1, the number of types of small business has increase from 45 to 100.)")+
  xlab("Time") + 
  ylab("Average Sales Per Store")

# sales by market type
sale.ind.mk <- df_all2 %>% group_by(t_id, mk_type) %>%
              summarize(avg_sales = mean(sales_ind, na.rm=TRUE))

ggplot(sale.ind.mk, aes(x=t_id, y=avg_sales, group=mk_type)) +
  geom_line(aes(color=mk_type)) + geom_point(aes(color=mk_type)) + 
  labs(title = "Sales Per Store by Market Types from 2017Q1 to 2020Q2", 
       caption ="(Starting 2019Q1, the number of types of small business has increase from 45 to 100.)", color = "Type of Markets")+
  xlab("Time") + ylab("Average Sales Per Store") 

# Sales Per Store for Food & Beverage
sales.fb.ind <- df_all2 %>%
                  filter(df_all2$svc<200000) %>%
                  group_by(t_id, SVC_DES) %>%
                  summarize(Avg_Sales = mean(sales_ind, na.rm=TRUE))

ggplot(sales.fb.ind, aes(y=Avg_Sales, x=t_id, group=SVC_DES)) + 
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES)) + 
  labs(title = "Change in Sales Per Store of F&B  from 2017Q1 to 2020Q2", 
       color="Type of Food/Beverage")+
  xlab("Time") + ylab("Average Sales Per Store") 

sales.fb.mk.ind <- df_all2 %>%
                  filter(df_all2$svc<200000) %>%
                  group_by(t_id, mk_type, SVC_DES) %>%
                  summarize(Avg_Sales = mean(sales_ind, na.rm=TRUE))

ggplot(sales.fb.mk.ind, aes(y=Avg_Sales, x=t_id, group=SVC_DES)) + 
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES)) + 
  labs(title = "Change in Sales Per Store of F&B  from 2017Q1 to 2020Q2", 
       color="Type of Food/Beverage")+
  xlab("Time") + ylab("Average Sales Per Store") +
  facet_wrap(~mk_type, scale="free_x") +
  theme(axis.text.x = element_text( angle=70, hjust= 0.5, vjust = 0.5))

1.3 Tickets (Sales Volumns: No. of Sales Transactions)

# Number of Sales
ticket <- df_all2 %>% 
            group_by(t_id) %>% 
            summarize(tickets=sum(THSMON_SELNG_CO, na.rm = TRUE))

ggplot(ticket, aes(x=t_id, y=tickets)) + geom_line(aes(group=1)) +
  labs(title="Total No. of Tickets (Numbers of Sales)", 
       caption ="(Starting 2019Q1, the number of types of small business has increase from 45 to 100.)") + 
  xlab("Time") + ylab("Total No. of Tickets")

## Tickets of F&B

# Number of Sales of F&B
ticket.fb <- df_all2 %>% 
              filter(svc<200000) %>%
              group_by(t_id, SVC_DES) %>% 
              summarize(tickets=sum(THSMON_SELNG_CO, na.rm = TRUE))


ggplot(ticket.fb, aes(y=tickets, x=t_id, group=SVC_DES)) + 
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES)) + 
  labs(title = "Total Tickets of F&B  from 2017Q1 to 2020Q2", 
       color="Type of Food/Beverage")+
  xlab("Time") + ylab("Total Tickets") 

ticket.fb.mk <- df_all2 %>%
                  filter(svc<200000) %>%
                  group_by(t_id, mk_type, SVC_DES) %>%
                  summarize(tickets=sum(THSMON_SELNG_CO, na.rm = TRUE))
                  
ggplot(ticket.fb.mk, aes(y=tickets, x=t_id, group=SVC_DES)) + 
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES)) + 
  labs(title = "Total Tickets of F&B  from 2017Q1 to 2020Q2", 
       color="Type of Food/Beverage")+
  xlab("Time") + ylab("Total Tickets") +
  facet_wrap(~mk_type, scale="free_x") +
  theme(axis.text.x = element_text( angle=70, hjust= 0.5, vjust = 0.5))

1.3.1 Tickets Per Store

# Tickets Per Store
df_all2$ticket.ind <- df_all2$THSMON_SELNG_CO / df_all2$STOR_CO
df_all2$ticket.ind[is.infinite(df_all2$ticket.ind)] <- NA

tickets.ind <- df_all2 %>% group_by(t_id) %>% 
                summarize(tickets_ind = mean(ticket.ind, na.rm=TRUE))

ggplot(tickets.ind, aes(x=t_id, y=tickets_ind)) + geom_line(aes(group=1)) +
  labs(title="Tickets Per Stores", caption ="(Starting 2019Q1, the number of types of small business has increase from 45 to 100.)") +
  xlab("Time") + ylab("Tickets Per Stores")

tickets.ind.mk <- df_all2 %>% group_by(t_id, mk_type) %>% 
                summarize(tickets_ind = mean(ticket.ind, na.rm=TRUE))

ggplot(tickets.ind.mk, aes(x=t_id, y=tickets_ind, group=mk_type)) +
  geom_line(aes(color=mk_type)) + geom_point(aes(color=mk_type)) +
  labs(title="Tickets Per Stores", 
       caption ="(Starting 2019Q1, the number of types of small business has increase from 45 to 100.)", 
       color="Type of Markets") +
  xlab("Time") + ylab("Tickets Per Stores")

1.3.2 Tickets Per Store for F&B

# Tickets of F&B
tickets.fb <- df_all2 %>%
                filter(svc<200000) %>%
                group_by(t_id, SVC_DES) %>%
                summarize(avg_ticket = mean(ticket.ind, na.rm=TRUE)) 

ggplot(tickets.fb, aes(x=t_id, y=avg_ticket, group=SVC_DES)) + 
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES))

tickets.fb.mk <- df_all2 %>%
                    filter(svc<200000) %>%
                    group_by(t_id, mk_type, SVC_DES) %>%
                    summarize(avg_ticket = mean(ticket.ind, na.rm=TRUE)) 

ggplot(tickets.fb.mk, aes(x=t_id, y=avg_ticket, group=SVC_DES)) + 
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES)) + 
  labs(title="Average Tickets Per Store of F&B") +
  xlab("Time") + ylab("Average Tickets") + 
  facet_wrap(~mk_type, scale="free_x") +
  theme(axis.text.x = element_text( angle=70, hjust= 0.5, vjust = 0.5))

1.4 Sale Per Ticket at Store Level

# Sales per Ticket
df_all2$sales.tic <- (df_all2$THSMON_SELNG_AMT/df_all2$THSMON_SELNG_CO)/df_all2$STOR_CO
df_all2$sales.tic[is.infinite(df_all2$sales.tic)] <- NA

sales.tic <- df_all2 %>% 
                group_by(t_id) %>% 
                summarize(avg_sales_tic = mean(sales.tic, na.rm=TRUE))

ggplot(sales.tic, aes(x=t_id, y=avg_sales_tic)) + geom_line(aes(group=1)) + 
  labs(title="Average Sales Per Ticket (Per Ticket and Per Store)",
       caption ="(Starting 2019Q1, the number of types of small business has increase from 45 to 100.)") +
  xlab("Time") + ylab("Average Sales Per Ticket")

sales.tic.mk <- df_all2 %>% 
                group_by(t_id, mk_type) %>% 
                summarize(avg_sales_tic = mean(sales.tic, na.rm=TRUE))

ggplot(sales.tic.mk, aes(x=t_id, y=avg_sales_tic, group=mk_type)) + 
  geom_line(aes(color=mk_type)) + geom_point(aes(color=mk_type)) + 
  labs(title="Average Sales Per Ticket (Per Ticket and Per Store)") +
  xlab("Time") + ylab("Average Sales Per Ticket")

1.4.1 Sales Per Ticket for F&B at Store Level

# Sales Per Ticket Per Store
sales.tic.fb <- df_all2 %>%
                filter(svc<200000) %>%
                group_by(t_id, SVC_DES) %>%
                summarize(avg_ticket = mean(sales.tic, na.rm=TRUE)) 

ggplot(sales.tic.fb, aes(x=t_id, y=avg_ticket, group=SVC_DES)) + 
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES))

sales.tic.fb.mk <- df_all2 %>%
                    filter(svc<200000) %>%
                    group_by(t_id, mk_type, SVC_DES) %>%
                    summarize(avg_ticket = mean(sales.tic, na.rm=TRUE)) 

ggplot(sales.tic.fb.mk, aes(x=t_id, y=avg_ticket, group=SVC_DES)) + 
  geom_line(aes(color=SVC_DES)) +
  geom_point(aes(color=SVC_DES)) + 
  labs(title="Sales Per Tickets of F&B",
       color="Type of F&B") +
  xlab("Time") + ylab("Sales Per Tickets") + 
  facet_wrap(~mk_type, scale="free_x") +
  theme(axis.text.x = element_text( angle=70, hjust= 0.5, vjust = 0.5))

1.5 Income

# MT_AVRG_INCOME_AMT: Monthly
# Data exists only for "local" markets
income <- df_all2 %>%
            group_by(t_id) %>%
            summarize(income = mean(MT_AVRG_INCOME_AMT, na.rm = TRUE))

ggplot(income, aes(x=t_id, y=income)) +
  geom_line(aes(group=1)) +
  labs(title="Income (only for 'Local' Market)", 
       caption = "(Income: the 70th Percentile of reported income for the national health insurance.)", 
       x ="Time", y="Income")

1.6 Flooting population

fpop <- df_all2 %>% 
          group_by(t_id) %>% 
          summarize(fpop = mean(TOT_FLPOP_CO, na.rm = TRUE))

ggplotly(
  ggplot(fpop, aes(x=t_id, y=fpop)) + geom_line(aes(group=1))
)
fpop.mk <- df_all2 %>% 
            group_by(t_id, mk_type) %>%
            summarize(fpop = mean(TOT_FLPOP_CO, na.rm = TRUE))

ggplotly(
  ggplot(fpop.mk, aes(x=t_id, y=fpop, group=mk_type)) + 
    geom_line(aes(color=mk_type)) +
    geom_point(aes(color=mk_type)) + 
    labs(title = "Floating Population", color ="Type of Markets") +
    xlab("Time") + ylab("Floating Population") +
    theme(axis.text.x = element_text( angle=70, hjust= 0.5, vjust = 0.5))

)
library(tidyr)

fld.fpop.age <- c('AGRDE_10_FLPOP_CO', 'AGRDE_20_FLPOP_CO', 'AGRDE_30_FLPOP_CO', 'AGRDE_40_FLPOP_CO','AGRDE_50_FLPOP_CO', 'AGRDE_60_ABOVE_FLPOP_CO')

fpop.age <- df_all2 %>%
            group_by(t_id, mk_type) %>%
            summarize(fpop.10 = mean(AGRDE_10_FLPOP_CO, na.rm=TRUE),
                      fpop.20 = mean(AGRDE_20_FLPOP_CO, na.rm=TRUE),
                      fpop.30 = mean(AGRDE_30_FLPOP_CO, na.rm=TRUE),
                      fpop.40 = mean(AGRDE_40_FLPOP_CO, na.rm=TRUE),
                      fpop.50 = mean(AGRDE_50_FLPOP_CO, na.rm=TRUE),
                      fpop.60 = mean(AGRDE_60_ABOVE_FLPOP_CO, na.rm=TRUE)
                      )
            
fpop.age %>%
    gather(Age, fpop0, fpop.10, fpop.20, fpop.30, fpop.40, fpop.50, fpop.60) %>%
    ggplot(aes(x=t_id, y=fpop0, group=Age)) + 
      geom_line(aes(color=Age)) +
      geom_point(aes(color=Age)) +
      facet_wrap(.~mk_type) +
      labs(title = "Floating Population", 
           x = "\n Time",
           y = "Floating Popualation \n") +
      # xlab("Time") + ylab("Floating Population") +
      theme(axis.text.x = element_text( angle=70, hjust= 0, vjust = 0),
            axis.title.x=element_text(vjust=1),  # X axis title
            axis.title.y=element_text(vjust=1)) +  # Y axis title +
      scale_color_discrete(name="Age", 
                           breaks = c('fpop.10', 'fpop.20', 'fpop.30', 
                                    "fpop.40", "fpop.50", "fpop.60"),
                           labels=c("10s", "20s", "30s", "40s", "50s", "60s and \n Above"))

1.7 Population

# Population
# TOT_REPOP_CO
# TOT_HSHLD_CO

pop <- df_all2 %>% group_by(t_id) %>%  summarize(pop=mean(TOT_REPOP_CO, na.rm=TRUE))
        
ggplot(pop, aes(x=t_id, y=pop)) + geom_line(aes(group=1))

pop.mk <- df_all2 %>% group_by(t_id, mk_type) %>%  summarize(pop=mean(TOT_REPOP_CO, na.rm=TRUE))
        
ggplot(pop.mk, aes(x=t_id, y=pop, group=mk_type)) + 
  geom_line(aes(color=mk_type)) +
  labs(title="Residence Population" , 
       y ="Average Population", 
       x ="Time", 
       color ="Type of Markets")

# Household

house <- df_all2 %>% group_by(t_id) %>%  summarize(house=mean(TOT_HSHLD_CO, na.rm=TRUE))
        
ggplot(house, aes(x=t_id, y=house)) + geom_line(aes(group=1))

house.mk <- df_all2 %>% group_by(t_id, mk_type) %>%  summarize(house=mean(TOT_HSHLD_CO, na.rm=TRUE))
        

ggplot(house.mk, aes(x=t_id, y=house, group=mk_type)) + 
  geom_line(aes(color=mk_type)) +
  labs(title="Household" , 
       y ="Average No. of House holders", 
       x ="Time", 
       color ="Type of Markets")

1.8 Working Population

# TOT_WRC_POPLTN_CO

wpop <- df_all2 %>% group_by(t_id) %>%  summarize(wpop=mean(TOT_WRC_POPLTN_CO, na.rm=TRUE))
        
ggplot(wpop, aes(x=t_id, y=wpop)) + geom_line(aes(group=1)) + 
  labs(title = "Working Population from 2017Q1 to 2020Q2", 
       x="Time", y="Average Population")

wpop.mk <- df_all2 %>% group_by(t_id, mk_type) %>%  summarize(wpop=mean(TOT_WRC_POPLTN_CO, na.rm=TRUE))
        
ggplot(wpop.mk, aes(x=t_id, y=wpop, group=mk_type)) + 
  geom_line(aes(color=mk_type)) +
  labs(title="Working Population" , 
       y ="Average Population", 
       x ="Time", 
       color ="Type of Markets")

1.9 Apartment (need to be updated)

# APT_HSMP_CO : No. of Apartment Complex
# APT_HSHLD_CO : No. of Households in Apartment Complexes
# AVRG_AE : Average Apartment Area
# AVRG_MKTC: Average Apartment Price
apt.complex<- df_all2 %>% group_by(t_id) %>%  summarize(house=mean(APT_HSMP_CO, na.rm=TRUE))
        
ggplot(apt.complex, aes(x=t_id, y=house)) + geom_line(aes(group=1)) + 
  labs(title = "Apartment Complex from 2017Q1 to 2020Q2", 
       x="Time", y="No. of Apartment Complexes")

apt.complex.mk<- df_all2 %>% group_by(t_id, mk_type) %>%  
                  summarize(house=mean(APT_HSMP_CO, na.rm=TRUE))
        
ggplot(apt.complex.mk, aes(x=t_id, y=house, group=mk_type)) + 
  geom_line(aes(color = mk_type)) + geom_point(aes(color=mk_type)) +
  labs(title = "Apartment Complex from 2017Q1 to 2020Q2", 
       x="Time", y="No. of Apartment Complexes", color="Type of Markets")

# AVRG_AE
apt.area <- df_all2 %>%
              group_by(t_id) %>%
              summarize(area =  mean(AVRG_AE, na.rm=TRUE))

ggplot(apt.area, aes(x=t_id, y=area)) + 
  geom_line(aes(group=1)) + 
  labs(title="Apartment Area from 2017Q1 to 2020Q2", 
       x = "Time", y="Average Apartment Area(Square Meter)")

apt.area.mk <- df_all2 %>%
              group_by(t_id, mk_type) %>%
              summarize(area =  mean(AVRG_AE, na.rm=TRUE))

ggplot(apt.area.mk, aes(x=t_id, y=area, group=mk_type)) + 
  geom_line(aes(color=mk_type)) + geom_point(aes(color=mk_type)) +
  labs(title="Apartment Area from 2017Q1 to 2020Q2", 
       x = "Time", y="Average Apartment Area(Square Meter)", color="Type of Markets")

# AVRG_MKTC
apt.price <- df_all2 %>%
              group_by(t_id) %>%
              summarize(price = mean(AVRG_MKTC, na.rm=TRUE))

ggplot(apt.price, aes(x=t_id, y=price)) + 
  geom_line(aes(group=1)) + 
  labs(title="Apartment Area from 2017Q1 to 2020Q2", 
       x = "Time", y="Average Apartment Price")

apt.price.mk <- df_all2 %>%
              group_by(t_id, mk_type) %>%
              summarize(price = mean(AVRG_MKTC, na.rm=TRUE))

ggplot(apt.price.mk, aes(x=t_id, y=price, group=mk_type)) + 
  geom_line(aes(color=mk_type)) + geom_point(aes(color=mk_type)) + 
  labs(title="Apartment Area from 2017Q1 to 2020Q2", 
       x = "Time", y="Average Apartment Price", color="Type of Markets")

# APT_HSHLD_CO : No. of Households in Apartment Complexes (Need to check)
# apt.hh<- df_all2 %>% group_by(t_id) %>%  summarize(hh=mean(APT_HSHLD_CO, na.rm=TRUE))
#         
# ggplot(apt.hh, aes(x=t_id, y=hh)) + geom_line(aes(group=1)) + 
#   labs(title = "Apartment Complex from 2017Q1 to 2020Q2", 
#        x="Time", y="No. of Apartment Households")
# 
# 
# apt.hh.mk<- df_all2 %>% group_by(t_id, mk_type) %>%  
#                   summarize(hh=mean(APT_HSHLD_CO, na.rm=TRUE))
#         
# ggplot(apt.hh.mk, aes(x=t_id, y=hh, group=mk_type)) + 
#   geom_line(aes(color = mk_type)) + geom_point(aes(color=mk_type)) +
#   labs(title = "Apartment Complex from 2017Q1 to 2020Q2", 
#        x="Time", y="No. of Apartment Complexes")

1.10 Facility

# VIATR_FCLTY_CO
total.fac<- df_all2 %>% group_by(t_id) %>%  summarize(fac=mean(VIATR_FCLTY_CO, na.rm=TRUE))
        
ggplot(total.fac, aes(x=t_id, y=fac)) + geom_line(aes(group=1)) + 
  labs(title="Facility from 2017Q1 to 2020Q2", 
       x="Time", y="No. of Facilities")

total.fac.mk<- df_all2 %>% group_by(t_id, mk_type) %>%  summarize(fac=mean(VIATR_FCLTY_CO, na.rm=TRUE))
        
ggplot(total.fac.mk, aes(x=t_id, y=fac, group=mk_type)) + 
  geom_line(aes(color=mk_type)) + geom_point(aes(color=mk_type)) +
  labs(title="Facility from 2017Q1 to 2020Q2", 
       x="Time", y="No. of Facilities", color="Type of Markets")

# Type of Facilities 

fac <- df_all2 %>% 
        group_by(t_id) %>%
        summarize(public = mean(PBLOFC_CO, na.rm=TRUE), 
                  bank = mean(BANK_CO, na.rm=TRUE), 
                  hospital = mean(GEHSPT_CO, na.rm=T), 
                  clinic = mean(GNRL_HSPTL_CO, na.rm=T), 
                  parmacy = mean(PARMACY_CO, na.rm=T), 
                  kindergarten = mean(KNDRGR_CO, na.rm=T),
                  elementary = mean(ELESCH_CO, na.rm=T), 
                  junior.high = mean(MSKUL_CO, na.rm=T),
                  high.school = mean(HGSCHL_CO, na.rm=T), 
                  univ = mean(UNIV_CO, na.rm=T), 
                  department.store = mean(DRTS_CO, na.rm=T), 
                  supermarket = mean(SUPMK_CO, na.rm=T), 
                  movie = mean(THEAT_CO, na.rm=T),
                  lodging = mean(STAYNG_FCLTY_CO, na.rm=T), 
                  airport = mean(ARPRT_CO, na.rm=T), 
                  rail = mean(RLROAD_STATN_CO, na.rm=T), 
                  bus.terminal = mean(BUS_TRMINL_CO, na.rm=T), 
                  subway = mean(SUBWAY_STATN_CO, na.rm=T), 
                  bus = mean(BUS_STTN_CO, na.rm=T)
                  )

# paste0(colnames(fac)[-1], collapse = ", ")
fac %>% 
  gather(fac_type, fac_no, public, bank, hospital, clinic, parmacy, kindergarten,
         elementary, junior.high, high.school, univ, department.store, supermarket, movie,
         lodging, airport, rail, bus.terminal, subway, bus ) %>%
  ggplot(aes(x=t_id, y=fac_no, group=fac_type)) + 
  geom_line(aes(color=fac_type)) + geom_point(aes(color=fac_type)) +
  labs(title = "Type of Facilities ", 
         x= "Time", 
         y= "No. of Facilities", 
         color = "Type of Facilities") +
  guides(color=guide_legend(ncol=2)) +
  theme(axis.text.x = element_text( angle=70, hjust= 0, vjust = 0),)

# fac by market type
fac.mk <- df_all2 %>% 
        group_by(t_id, mk_type) %>%
        summarize(public = mean(PBLOFC_CO, na.rm=TRUE), 
                  bank = mean(BANK_CO, na.rm=TRUE), 
                  hospital = mean(GEHSPT_CO, na.rm=T), 
                  clinic = mean(GNRL_HSPTL_CO, na.rm=T), 
                  parmacy = mean(PARMACY_CO, na.rm=T), 
                  kindergarten = mean(KNDRGR_CO, na.rm=T),
                  elementary = mean(ELESCH_CO, na.rm=T), 
                  junior.high = mean(MSKUL_CO, na.rm=T),
                  high.school = mean(HGSCHL_CO, na.rm=T), 
                  univ = mean(UNIV_CO, na.rm=T), 
                  department.store = mean(DRTS_CO, na.rm=T), 
                  supermarket = mean(SUPMK_CO, na.rm=T), 
                  movie = mean(THEAT_CO, na.rm=T),
                  lodging = mean(STAYNG_FCLTY_CO, na.rm=T), 
                  airport = mean(ARPRT_CO, na.rm=T), 
                  rail = mean(RLROAD_STATN_CO, na.rm=T), 
                  bus.terminal = mean(BUS_TRMINL_CO, na.rm=T), 
                  subway = mean(SUBWAY_STATN_CO, na.rm=T), 
                  bus = mean(BUS_STTN_CO, na.rm=T)
                  )

# paste0(colnames(fac)[-1], collapse = ", ")
fac.mk %>% 
  gather(fac_type, fac_no, public, bank, hospital, clinic, parmacy, kindergarten,
         elementary, junior.high, high.school, univ, department.store, supermarket, movie,
         lodging, airport, rail, bus.terminal, subway, bus ) %>%
  ggplot(aes(x=t_id, y=fac_no, group=fac_type)) + 
  geom_line(aes(color=fac_type)) + geom_point(aes(color=fac_type)) +
  labs(title = "Type of Facilities ", 
         x= "Time", 
         y= "No. of Facilities", 
         color = "Type of Facilities") +
  guides(color=guide_legend(ncol=2)) +
  theme(axis.text.x = element_text( angle=70, hjust= 0, vjust = 0),) +
  facet_wrap(~mk_type)