1 Data Processing

#---- Change factor to Date
Saledata$Date <- as.POSIXct(Saledata$Date, format = '%B %d, %Y') 
Saledata$Day <- weekdays(as.Date(Saledata$Date))
#---- Adding Day of Week and arrage from Mon to Sun
Saledata$Day <- factor(Saledata$Day, levels= c("Monday", 
    "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
Saledata <- Saledata[order(Saledata$Day), ]
#--- Check min date - max date of each unique value 
library(dplyr)
df1 <- Saledata %>%
  group_by(Store.ID) %>%
  mutate(minDia=min(Date, na.rm=T), maxDia=max(Date, na.rm=T))

df2 <- df1 %>%
  group_by(Store.ID) %>%
  mutate(minDia=min(Date, na.rm=T), maxDia=max(Date, na.rm=T)) %>%
  ungroup() %>%
  distinct(Store.ID, minDia, maxDia)

table(Saledata$Period)
## 
## June 05 - June 14 June 19 - June 28   May 13 - May 17   May 20 - May 24 
##              2709              3351              1386              1491
Saledata$Store.ID <- as.character(Saledata$Store.ID)

2 Overview

2.1 Traffic Volume

# Peak time in Date
WHEN_PeakTime <- Saledata %>% group_by(Store.Type,Hour) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))
# Peak time in Week-Date
Day_WHEN_PeakTime <- Saledata %>% group_by(Store.Type,Day) %>% summarise(sumTraffic =sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))

2.2 Basket size

# Histogram 
cor(Saledata$Transaction, Saledata$Avg.Basket.Size) # No correlation 
## [1] 0.2636961
Saledata1 <- Saledata %>% filter(Store.Type == "SHOPPING_MALL")
hist(Saledata1$Avg.Basket.Size)

Saledata2 <- Saledata %>% filter(Store.Type == "INDEPENDENT")
hist(Saledata2$Avg.Basket.Size)

par(
  mfrow=c(1,2),
  mar=c(4,4,1,0)
)
hist(Saledata1$Avg.Basket.Size, breaks=30 , xlim=c(0,30) , col=rgb(1,0,0,0.5) , xlab="Avg.BasketSize" , ylab="Frequency" , main="" )
hist(Saledata2$Avg.Basket.Size, breaks=30 , xlim=c(0,30) , col=rgb(0,0,1,0.5) , xlab="Avg.BasketSize" , ylab="Frequency" , main="")

quantile(Saledata1$Avg.Basket.Size, c(0.0, 0.25, 0.50, 0.75, 1.0))
##        0%       25%       50%       75%      100% 
##  0.000000  2.000000  2.666667  3.650082 19.000000
quantile(Saledata2$Avg.Basket.Size, c(0.0, 0.25, 0.50, 0.75, 1.0))
##   0%  25%  50%  75% 100% 
##    0    2    3    4   26

2.3 Shopping Mall

# 1. Peak time - Week day 
SHOPPING_MALL <- Day_WHEN_PeakTime %>% filter(Store.Type == "SHOPPING_MALL")
SHOPPING_MALL_long <- gather(SHOPPING_MALL, Condition, Measurement, sumTraffic:sumTransaction, factor_key=TRUE)

SHOPPING_MALL_long <- SHOPPING_MALL_long %>% filter(Condition == "sumTraffic")

ggplot(SHOPPING_MALL_long, aes(fill=Condition, y=Measurement, x=Day)) + geom_bar(position="stack", stat="identity") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  scale_color_manual(values = c("#0073C2FF"))+
  scale_fill_manual(values = c("#0073C2FF")) + geom_text(
  aes(label = Measurement), 
  position = position_dodge(0.8),
  vjust = -0.3, size = 5.0
)   #stack bar

ggplot(data=SHOPPING_MALL_long, aes(x=Day, y=Measurement, group=1)) +
  geom_line()+
  geom_point()+ theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  scale_color_manual(values = c("#0073C2FF"))+
  scale_fill_manual(values = c("#0073C2FF")) + geom_text(
  aes(label = Measurement), 
  position = position_dodge(0.8),
  vjust = -0.3, size = 4.0
) # line cHART

# 2. Peak time - Hour in a Date  
SHOPPING_MALL <- WHEN_PeakTime %>% filter(Store.Type == "SHOPPING_MALL")
SHOPPING_MALL_long <- gather(SHOPPING_MALL, Condition, Measurement, sumTraffic:sumTransaction, factor_key=TRUE)

ggplot(data = SHOPPING_MALL, aes(x = Hour, y = sumTraffic))+
  geom_line(color = "#00AFBB", size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_area(fill = "lightpink") 

ggplot(SHOPPING_MALL_long, aes(x = Hour, y = Measurement)) + 
  geom_line(aes(color = Condition), size = 1.0) +
  scale_color_manual(values = c("#EFC000FF", "#6A00A8FF", "#B12A90FF")) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_text(aes(label = Measurement), position = position_dodge(0.8), vjust = -0.3, size = 2.0)

# 3. Conversion rate accorrding to time
SHOPPING_MALL <- SHOPPING_MALL %>% mutate(noInteraction_Rate = paste0(round(((sumTraffic - sumInteraction)/sumTraffic) * 100,0), "%"))

SHOPPING_MALL_Performance <- SHOPPING_MALL %>%
  mutate(Traffic_conRate = round((sumTransaction / sumTraffic) * 100, 2)) %>% 
  mutate(Interaction_conRate = round((sumTransaction / sumInteraction) * 100, 2))
SHOPPING_MALL_Performance 
## # A tibble: 15 x 8
## # Groups:   Store.Type [1]
##    Store.Type  Hour sumTraffic sumInteraction sumTransaction noInteraction_R…
##    <fct>      <int>      <int>          <int>          <int> <chr>           
##  1 SHOPPING_…     8         27             18              3 33%             
##  2 SHOPPING_…     9        437            357            114 18%             
##  3 SHOPPING_…    10       3372           2855            925 15%             
##  4 SHOPPING_…    11       3912           3335           1084 15%             
##  5 SHOPPING_…    12       3495           2994            956 14%             
##  6 SHOPPING_…    13       3715           3171            967 15%             
##  7 SHOPPING_…    14       3852           3308           1000 14%             
##  8 SHOPPING_…    15       3940           3417           1006 13%             
##  9 SHOPPING_…    16       3809           3286            996 14%             
## 10 SHOPPING_…    17       3865           3243           1011 16%             
## 11 SHOPPING_…    18       4884           4096           1250 16%             
## 12 SHOPPING_…    19       7532           6395           1974 15%             
## 13 SHOPPING_…    20       9217           7842           2316 15%             
## 14 SHOPPING_…    21       4665           4015           1189 14%             
## 15 SHOPPING_…    22         45             44             22 2%              
## # … with 2 more variables: Traffic_conRate <dbl>, Interaction_conRate <dbl>
# 4. Funnel Traffic
SHOPPING_MALL_F <- SHOPPING_MALL %>% group_by(Store.Type) %>% summarise(Traffic = sum(sumTraffic), Interaction = sum(sumInteraction), Convert = sum(sumTransaction))
SHOPPING_MALL_F$Store.Type <- NULL 
SHOPPING_MALL_F <- gather(SHOPPING_MALL_F, Condition, Measurement, Traffic:Convert, factor_key=TRUE) # convert wide to long
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
fig <- plot_ly() 
fig <- fig %>%
  add_trace(type = "funnel",
            y = SHOPPING_MALL_F$Condition,
            x = SHOPPING_MALL_F$Measurement,
            textposition = "inside",
            textinfo = "value+percent initial",
            opacity = 0.65,
            marker = list(color = c("deepskyblue", "lightsalmon", "tan", "teal", "silver"),
                          line = list(width = c(4, 2, 2, 3, 1, 1), color = c("wheat", "wheat", "blue", "wheat", "wheat"))),
            connector = list(line = list(color = "royalblue", dash = "dot", width = 3)))
fig <- fig %>%
  layout(yaxis = list(categoryarray = SHOPPING_MALL_F$Condition))
fig

2.4 Independent Store

# 1. Peak time - when - Day 
INDEPENDENT <- Day_WHEN_PeakTime %>% filter(Store.Type == "INDEPENDENT")
INDEPENDENT_long <- gather(INDEPENDENT, Condition, Measurement, sumTraffic:sumTransaction, factor_key=TRUE)

INDEPENDENT_long <- INDEPENDENT_long %>% filter(Condition == "sumTraffic")

ggplot(INDEPENDENT_long, aes(fill=Condition, y=Measurement, x=Day)) + 
    geom_bar(position="stack", stat="identity") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  scale_color_manual(values = c("#EFC000FF"))+
  scale_fill_manual(values = c("#EFC000FF")) + geom_text(
  aes(label = Measurement), 
  position = position_dodge(0.8),
  vjust = -0.3, size = 3.5
) #stack bar

ggplot(data=INDEPENDENT_long, aes(x=Day, y=Measurement, group=1)) +
  geom_line()+
  geom_point()+ theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  scale_color_manual(values = c("#0073C2FF"))+
  scale_fill_manual(values = c("#0073C2FF")) + geom_text(
  aes(label = Measurement), 
  position = position_dodge(0.8),
  vjust = -0.3, size = 4.0
) # line cHART

# 2. Peak time - when - Hour
INDEPENDENT <- WHEN_PeakTime %>% filter(Store.Type == "INDEPENDENT")
INDEPENDENT_long <- gather(INDEPENDENT, Condition, Measurement, sumTraffic:sumTransaction, factor_key=TRUE)

ggplot(data = INDEPENDENT, aes(x = Hour, y = sumTraffic))+
  geom_line(color = "#00AFBB", size = 0.3) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_area(fill = "lightblue")

ggplot(INDEPENDENT_long, aes(x = Hour, y = Measurement)) + 
  geom_line(aes(color = Condition), size = 1.0) +
  scale_color_manual(values = c("#b9005f", "#5fb900","#005fb9")) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 

# 3. Conversion rate accorrding to time
INDEPENDENT <- INDEPENDENT %>% mutate(noInteraction_Rate = paste0(round(((sumTraffic - sumInteraction)/sumTraffic) * 100,0), "%"))
INDEPENDENT_Performance <- INDEPENDENT %>%
  mutate(Traffic_conRate = round((sumTransaction / sumTraffic) * 100, 2)) %>% 
  mutate(Interaction_conRate = round((sumTransaction / sumInteraction) * 100, 2))
INDEPENDENT_Performance
## # A tibble: 15 x 8
## # Groups:   Store.Type [1]
##    Store.Type  Hour sumTraffic sumInteraction sumTransaction noInteraction_R…
##    <fct>      <int>      <int>          <int>          <int> <chr>           
##  1 INDEPENDE…     8        188            148             47 21%             
##  2 INDEPENDE…     9       1665           1457            603 12%             
##  3 INDEPENDE…    10       2495           2274            968 9%              
##  4 INDEPENDE…    11       2453           2225            966 9%              
##  5 INDEPENDE…    12       1942           1785            806 8%              
##  6 INDEPENDE…    13       1660           1518            675 9%              
##  7 INDEPENDE…    14       1911           1746            683 9%              
##  8 INDEPENDE…    15       2447           2260            915 8%              
##  9 INDEPENDE…    16       2993           2780           1206 7%              
## 10 INDEPENDE…    17       3949           3605           1661 9%              
## 11 INDEPENDE…    18       4479           4153           1972 7%              
## 12 INDEPENDE…    19       5954           5512           2453 7%              
## 13 INDEPENDE…    20       6874           6342           2654 8%              
## 14 INDEPENDE…    21       4078           3694           1468 9%              
## 15 INDEPENDE…    22         77             72             35 6%              
## # … with 2 more variables: Traffic_conRate <dbl>, Interaction_conRate <dbl>
# 4. Funnel Traffic
INDEPENDENT_F <- INDEPENDENT %>% group_by(Store.Type) %>% summarise(Traffic = sum(sumTraffic), Interaction = sum(sumInteraction), Convert = sum(sumTransaction))
INDEPENDENT_F$Store.Type <- NULL 
INDEPENDENT_F <- gather(INDEPENDENT_F, Condition, Measurement, Traffic:Convert, factor_key=TRUE) # convert wide to long
library(plotly)
fig <- plot_ly() 
fig <- fig %>%
  add_trace(type = "funnel",
            y = INDEPENDENT_F$Condition,
            x = INDEPENDENT_F$Measurement,
            textposition = "inside",
            textinfo = "value+percent initial",
            opacity = 0.65,
            marker = list(color = c("deepskyblue", "lightsalmon", "tan", "teal", "silver"),
                          line = list(width = c(4, 2, 2, 3, 1, 1), color = c("wheat", "wheat", "blue", "wheat", "wheat"))),
            connector = list(line = list(color = "royalblue", dash = "dot", width = 3))) 
fig <- fig %>%
  layout(yaxis = list(categoryarray = INDEPENDENT_F$Condition))
fig

2.5 Comparision

# 1. Funnel
library(plotly)
fig <- plot_ly(
    type = "funnel",
    name = 'Shopping_Mall',
    y = SHOPPING_MALL_F$Condition,
    x = SHOPPING_MALL_F$Measurement,
    textinfo = "value+percent initial") 
fig <- fig %>%
  add_trace(
    type = "funnel",
    name = 'Independent',
    orientation = "h",
    y = INDEPENDENT_F$Condition,
    x = INDEPENDENT_F$Measurement,
    textposition = "inside",
    textinfo = "value+percent previous") 
fig <- fig %>%
  layout(yaxis = list(categoryarray = c("Traffic", "Interaction", "Convert")))
fig
# 2. Interaction 
# Interaction Rate by Day in Week 
Saledata_1 <- Saledata %>% group_by(Store.Type,Day) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))

Saledata_1 <- Saledata_1 %>% mutate(Interaction_conRate = paste(round((sumInteraction / sumTraffic) * 100, 1),'%'))

Saledata_1$sumTraffic <- NULL
Saledata_1$sumInteraction <- NULL
Saledata_1$sumTransaction <- NULL

ggplot(Saledata_1, aes(x=Day, y=Interaction_conRate, group=Store.Type)) +
  geom_line(aes(linetype=Store.Type, color=Store.Type))+
  geom_point(aes(color=Store.Type))+
  theme(legend.position="top") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_text(
  aes(label = Interaction_conRate), 
  position = position_dodge(0.8),
  vjust = -0.3, size = 3.5
) #stack bar

# Interaction Rate by Hour in Day 
Saledata_1 <- Saledata %>% group_by(Store.Type,Hour) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))

Saledata_1 <- Saledata_1 %>% mutate(Interaction_conRate = paste(round((sumInteraction / sumTraffic) * 100, 1),'%'))

Saledata_1$sumTraffic <- NULL
Saledata_1$sumInteraction <- NULL
Saledata_1$sumTransaction <- NULL

ggplot(Saledata_1, aes(x=Hour, y=Interaction_conRate, group=Store.Type)) +
  geom_line(aes(linetype=Store.Type, color=Store.Type))+
  geom_point(aes(color=Store.Type))+
  theme(legend.position="top") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_text(
  aes(label = Interaction_conRate), 
  position = position_dodge(0.8),
  vjust = -0.3, size = 3.5)

# 3. Transaction
# Transaction Rate by Day in Week 
Saledata_1 <- Saledata %>% group_by(Store.Type,Day) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))

Saledata_1 <- Saledata_1 %>% mutate(Transaction_conRate = paste(round((sumTransaction / sumInteraction) * 100, 1),'%'))

Saledata_1$sumTraffic <- NULL
Saledata_1$sumInteraction <- NULL
Saledata_1$sumTransaction <- NULL

ggplot(Saledata_1, aes(x=Day, y=Transaction_conRate, group=Store.Type)) +
  geom_line(aes(linetype=Store.Type, color=Store.Type))+
  geom_point(aes(color=Store.Type))+
  theme(legend.position="top") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_text(
  aes(label = Transaction_conRate), 
  position = position_dodge(0.8),
  vjust = -0.3, size = 3.5
) #stack bar

# Transaction Rate by Hour in Day 
Saledata_1 <- Saledata %>% group_by(Store.Type,Hour) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))

Saledata_1 <- Saledata_1 %>% mutate(Transaction_conRate = paste(round((sumTransaction / sumInteraction) * 100, 2),'%'))

Saledata_1$sumTraffic <- NULL
Saledata_1$sumInteraction <- NULL
Saledata_1$sumTransaction <- NULL

ggplot(Saledata_1, aes(x=Hour, y=Transaction_conRate, group=Store.Type)) +
  geom_line(aes(linetype=Store.Type, color=Store.Type))+
  geom_point(aes(color=Store.Type))+
  theme(legend.position="top") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_text(
  aes(label = Transaction_conRate), 
  position = position_dodge(0.8),
  vjust = -0.3, size = 3.5) #stack bar

# 4 Performance over period 
# Store.Type
Saledata_2 <- Saledata %>% group_by(Store.Type,Period) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))
# Change long to wide                                    
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
meltSaledata2 <- melt(Saledata_2, id.vars=1:2)
u <- dcast(meltSaledata2, Store.Type ~ Period + variable, fun.aggregate = sum)
library(writexl)
write_xlsx(x = u, path = "u.xlsx",col_names = TRUE)
## [1] "/Users/admin/Documents/Linh-R Studio/SaleData/u.xlsx"
#Store.ID
Saledata_3 <- Saledata %>% filter(Store.Type == "INDEPENDENT") %>% group_by(Store.ID,Period) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))
# Change long to wide                                    
meltSaledata3 <- melt(Saledata_3, id.vars=1:2)
z <- dcast(meltSaledata3, Store.ID ~ Period + variable, fun.aggregate = sum)
library(writexl)
write_xlsx(x = z, path = "z2.xlsx",col_names = TRUE)
## [1] "/Users/admin/Documents/Linh-R Studio/SaleData/z2.xlsx"

3 Operation

3.1 Data Preparation

Shopping_mall <- Saledata %>% filter(Store.Type == "SHOPPING_MALL")
library(dplyr)
df1 <- Shopping_mall %>%
  group_by(Store.ID) %>%
  mutate(minDia=min(Date, na.rm=T), maxDia=max(Date, na.rm=T))
df2 <- df1 %>%
  group_by(Store.ID) %>%
  mutate(minDia=min(Date, na.rm=T), maxDia=max(Date, na.rm=T)) %>%
  ungroup() %>%
  distinct(Store.ID, minDia, maxDia)

3.2 Traffic

3.2.1 Voulume

Traffic <- Shopping_mall %>% group_by(Store.ID) %>% summarise(TotalTraffic = sum(Traffic))
library(treemap)
    treemap(Traffic,
            index=c("Store.ID"), #single index
            vSize="TotalTraffic",
            vColor="TotalTraffic",
            palette="Purples",
            type="value") 

# Line chart by date
Traffic <- Shopping_mall %>% group_by(Store.ID, Date) %>% summarise(TotalTraffic = sum(Traffic))
library("gridExtra")
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
t1 <- ggplot(data=Traffic[which(Traffic$Date <= "2020-05-17"),], aes(x = Date, y = TotalTraffic)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 

t2 <- ggplot(data=Traffic[which(Traffic$Date <= "2020-05-24" & Traffic$Date >= "2020-05-20" ),], aes(x = Date, y = TotalTraffic)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

t3 <- ggplot(data=Traffic[which(Traffic$Date <= "2020-06-14" & Traffic$Date >= "2020-06-05" ),], aes(x = Date, y = TotalTraffic)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))  

t4 <- ggplot(data=Traffic[which(Traffic$Date <= "2020-06-28" & Traffic$Date >= "2020-06-19" ),], aes(x = Date, y = TotalTraffic)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))  

grid.arrange(t1,t2)

grid.arrange(t3,t4)

3.2.2 Ranking

Traffic <- Shopping_mall %>% group_by(Store.ID, Date) %>% summarise(TotalTraffic = sum(Traffic))
# Demo for "2020-05-13" to "2020-05-17"
table(Saledata$Period)
## 
## June 05 - June 14 June 19 - June 28   May 13 - May 17   May 20 - May 24 
##              2709              3351              1386              1491
my_theme <- function() {

  # Colors
  color.background = "white"
  color.text = "#22211d"

  # Begin construction of chart
  theme_bw(base_size=15) +

    # Format background colors
    theme(panel.background = element_rect(fill=color.background, color=color.background)) +
    theme(plot.background  = element_rect(fill=color.background, color=color.background)) +
    theme(panel.border     = element_rect(color=color.background)) +
    theme(strip.background = element_rect(fill=color.background, color=color.background)) +

    # Format the grid
    theme(panel.grid.major.y = element_blank()) +
    theme(panel.grid.minor.y = element_blank()) +
    theme(axis.ticks       = element_blank()) +

    # Format the legend
    theme(legend.position = "none") +

    # Format title and axis labels
    theme(plot.title       = element_text(color=color.text, size=20, face = "bold")) +
    theme(axis.title.x     = element_text(size=14, color="black", face = "bold")) +
    theme(axis.title.y     = element_text(size=14, color="black", face = "bold", vjust=1.25)) +
    theme(axis.text.x      = element_text(size=10, vjust=0.5, hjust=0.5, color = color.text)) +
    theme(axis.text.y      = element_text(size=10, color = color.text)) +
    theme(strip.text       = element_text(face = "bold")) +

    # Plot margins
    theme(plot.margin = unit(c(0.35, 0.2, 0.3, 0.35), "cm"))
}


Traffic1 <- subset(Traffic, Date <= "2020-05-17")

Transformed <- Traffic1 %>% group_by(Store.ID) %>% transform(., day=match(Date, unique(Date)))

df.rankings <- Transformed %>%
  group_by(Date) %>%
  arrange(Date, desc(TotalTraffic), Store.ID) %>%
  mutate(ranking = row_number()) %>% as.data.frame()

df.rankings$Store.ID <- as.character(df.rankings$Store.ID)

ggplot(data = df.rankings, aes(x = day, y = ranking, group = Store.ID)) +
  geom_line(aes(color = Store.ID, alpha = 1), size = 1) +
  geom_point(aes(color = Store.ID, alpha = 1), size = 3) +
  scale_y_reverse(breaks = 1:nrow(df.rankings)) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

3.3 Interaction

3.3.1 Volume

Shopping_mall <- Saledata %>% filter(Store.Type == "SHOPPING_MALL")
Mall_Interaction <- Shopping_mall %>% group_by(Store.ID) %>% summarise(Total_Interaction = sum(Interaction), TotalTraffic = sum(Traffic)) %>% mutate(Interaction_conRate = round((Total_Interaction / TotalTraffic) * 100, 2))
Mall_Interaction
## # A tibble: 11 x 4
##    Store.ID Total_Interaction TotalTraffic Interaction_conRate
##    <chr>                <int>        <int>               <dbl>
##  1 108                   1215         1360                89.3
##  2 220                    986         1147                86.0
##  3 228                    691          837                82.6
##  4 38                   11794        14704                80.2
##  5 79                    2953         3637                81.2
##  6 83                    8389         9406                89.2
##  7 84                    9818        11488                85.5
##  8 90                    3549         4162                85.3
##  9 95                    1498         1591                94.2
## 10 96                    4070         4610                88.3
## 11 97                    3413         3825                89.2
library(treemap)
    treemap(Mall_Interaction,
            index=c("Store.ID"), #single index
            vSize="Interaction_conRate",
            vColor="Interaction_conRate",
            palette="Set3",
            type="value") 

# Interaction line graph by date
Mall_Interaction <- Shopping_mall %>% group_by(Store.ID, Date) %>% summarise(Total_Interaction = sum(Interaction), TotalTraffic = sum(Traffic)) %>% mutate(Interaction_conRate = round((Total_Interaction / TotalTraffic) * 100, 2))

a1 <- ggplot(data=Mall_Interaction[which(Mall_Interaction$Date <= "2020-05-17"),], aes(x = Date, y = Interaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 

a2 <- ggplot(data=Mall_Interaction[which(Mall_Interaction$Date <= "2020-05-24" & Mall_Interaction$Date >= "2020-05-20" ),], aes(x = Date, y = Interaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

a3 <- ggplot(data=Mall_Interaction[which(Mall_Interaction$Date <= "2020-06-14" & Mall_Interaction$Date >= "2020-06-05" ),], aes(x = Date, y = Interaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))  

a4 <- ggplot(data=Mall_Interaction[which(Mall_Interaction$Date <= "2020-06-28" & Mall_Interaction$Date >= "2020-06-19" ),], aes(x = Date, y = Interaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 

grid.arrange(a1,a2)

grid.arrange(a3,a4)

3.3.2 Ranking

# Demo Date <= "2020-05-17"
Mall_Interaction1 <- subset(Mall_Interaction, Date <= "2020-05-17")
Transformed <- Mall_Interaction1 %>% group_by(Store.ID) %>% transform(., day=match(Date, unique(Date)))

df.rankings <- Transformed %>%
  group_by(Date) %>%
  arrange(Date, desc(Interaction_conRate), Store.ID) %>%
  mutate(ranking = row_number()) %>% as.data.frame()

df.rankings$Store.ID <- as.character(df.rankings$Store.ID)

ggplot(data = df.rankings, aes(x = day, y = ranking, group = Store.ID)) +
  geom_line(aes(color = Store.ID, alpha = 1), size = 1.2) +
  geom_point(aes(color = Store.ID, alpha = 1), size = 1.5) +
  scale_y_reverse(breaks = 1:nrow(df.rankings))

3.4 Transaction

3.4.1 Volume

Shopping_mall <- Saledata %>% filter(Store.Type == "SHOPPING_MALL")
Mall_Transaction <- Shopping_mall %>% group_by(Store.ID) %>% summarise(Total_Transaction = sum(Transaction), Total_Interaction = sum(Interaction)) %>% mutate(Transaction_conRate = round((Total_Transaction / Total_Interaction) * 100, 2))

library(treemap)
    treemap(Mall_Transaction,
            index=c("Store.ID"), #single index
            vSize="Transaction_conRate",
            vColor="Transaction_conRate",
            palette="RdBu",
            type="value") # type = value thi co scale bar

### By date 
Mall_Transaction <- Shopping_mall %>% group_by(Store.ID, Date) %>% summarise(Total_Transaction = sum(Transaction), Total_Interaction = sum(Interaction)) %>% mutate(Transaction_conRate = round((Total_Transaction / Total_Interaction) * 100, 2))

c1 <- ggplot(data=Mall_Transaction[which(Mall_Transaction$Date <= "2020-05-17"),], aes(x = Date, y = Transaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 

c2 <- ggplot(data=Mall_Transaction[which(Mall_Transaction$Date <= "2020-05-24" & Mall_Transaction$Date >= "2020-05-20"),], aes(x = Date, y = Transaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

c3 <- ggplot(data=Mall_Transaction[which(Mall_Transaction$Date <= "2020-06-14" & Mall_Transaction$Date >= "2020-06-05" ),], aes(x = Date, y = Transaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))  

c4 <- ggplot(data=Mall_Transaction[which(Mall_Transaction$Date <= "2020-06-28" & Mall_Transaction$Date >= "2020-06-19" ),], aes(x = Date, y = Transaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 

grid.arrange(c1,c2)

grid.arrange(c3,c4)

3.4.2 Ranking

#Demo Date <= "2020-06-14" & Date >= "2020-06-05"
Mall_Transaction1 <- subset(Mall_Transaction, Date <= "2020-06-14" & Date >= "2020-06-05")
Transformed <- Mall_Transaction1 %>% group_by(Store.ID) %>% transform(., day=match(Date, unique(Date))) 

df.rankings <- Transformed %>%
  group_by(Date) %>%
  arrange(Date, desc(Transaction_conRate), Store.ID) %>%
  mutate(ranking = row_number()) %>% as.data.frame()

df.rankings$Store.ID <- as.character(df.rankings$Store.ID)

ggplot(data = df.rankings, aes(x = Date, y = ranking, group = Store.ID)) +
  geom_line(aes(color = Store.ID, alpha = 1), size = 1.2) +
  geom_point(aes(color = Store.ID, alpha = 1), size = 1.5) +
  scale_y_reverse(breaks = 1:nrow(df.rankings))

3.5 Cluster Analysis

Shopping_mall <- Saledata %>% filter(Store.Type == "SHOPPING_MALL")

library(tidyverse)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
Shopping_mall_test <- Shopping_mall %>% group_by(Store.ID) %>% summarise(sumTraffic = sum(Traffic),sumInteraction = sum(Interaction),sumTransaction = sum(Transaction),meanBasetSize = mean(Avg.Basket.Size))

Shopping_mall_test$Store.ID <- as.character(Shopping_mall_test$Store.ID)

require(knitr)
## Loading required package: knitr
kable(head(Shopping_mall_test))
Store.ID sumTraffic sumInteraction sumTransaction meanBasetSize
108 1360 1215 402 2.098882
220 1147 986 0 0.000000
228 837 691 110 1.968690
38 14704 11794 4298 2.901002
79 3637 2953 627 2.426727
83 9406 8389 2405 2.641927
summary(Shopping_mall_test)
##    Store.ID           sumTraffic    sumInteraction  sumTransaction  
##  Length:11          Min.   :  837   Min.   :  691   Min.   :   0.0  
##  Class :character   1st Qu.: 1476   1st Qu.: 1356   1st Qu.: 409.5  
##  Mode  :character   Median : 3825   Median : 3413   Median :1000.0  
##                     Mean   : 5161   Mean   : 4398   Mean   :1346.6  
##                     3rd Qu.: 7008   3rd Qu.: 6230   3rd Qu.:2071.0  
##                     Max.   :14704   Max.   :11794   Max.   :4298.0  
##  meanBasetSize  
##  Min.   :0.000  
##  1st Qu.:2.263  
##  Median :2.642  
##  Mean   :2.371  
##  3rd Qu.:2.864  
##  Max.   :2.984
Shopping_mall_test <- na.omit(Shopping_mall_test)
summary(Shopping_mall_test)
##    Store.ID           sumTraffic    sumInteraction  sumTransaction  
##  Length:11          Min.   :  837   Min.   :  691   Min.   :   0.0  
##  Class :character   1st Qu.: 1476   1st Qu.: 1356   1st Qu.: 409.5  
##  Mode  :character   Median : 3825   Median : 3413   Median :1000.0  
##                     Mean   : 5161   Mean   : 4398   Mean   :1346.6  
##                     3rd Qu.: 7008   3rd Qu.: 6230   3rd Qu.:2071.0  
##                     Max.   :14704   Max.   :11794   Max.   :4298.0  
##  meanBasetSize  
##  Min.   :0.000  
##  1st Qu.:2.263  
##  Median :2.642  
##  Mean   :2.371  
##  3rd Qu.:2.864  
##  Max.   :2.984
dsData <- Shopping_mall_test

row.names(dsData) <- Shopping_mall_test$Store.ID
## Warning: Setting row names on a tibble is deprecated.
dsData <- scale(dsData[,2:5])
summary(dsData)
##    sumTraffic      sumInteraction    sumTransaction    meanBasetSize    
##  Min.   :-0.9292   Min.   :-0.9641   Min.   :-1.0098   Min.   :-2.7770  
##  1st Qu.:-0.7919   1st Qu.:-0.7910   1st Qu.:-0.7027   1st Qu.:-0.1267  
##  Median :-0.2870   Median :-0.2561   Median :-0.2599   Median : 0.3173  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.3970   3rd Qu.: 0.4764   3rd Qu.: 0.5432   3rd Qu.: 0.5776  
##  Max.   : 2.0509   Max.   : 1.9237   Max.   : 2.2131   Max.   : 0.7176
set.seed(123)
km.res <- kmeans(dsData, 5, nstart = 25)
km.res
## K-means clustering with 5 clusters of sizes 1, 3, 1, 4, 2
## 
## Cluster means:
##   sumTraffic sumInteraction sumTransaction meanBasetSize
## 1  2.0508877      1.9237355      2.2130817     0.6207611
## 2 -0.8376814     -0.8487407     -0.7775723    -0.2026117
## 3 -0.8625384     -0.8874087     -1.0097760    -2.7769747
## 4 -0.2368513     -0.2344965     -0.1800664     0.4322821
## 5  1.1360500      1.2239406      0.9248384     0.5174601
## 
## Clustering vector:
##  [1] 2 3 2 1 4 5 5 4 2 4 4
## 
## Within cluster sum of squares by cluster:
## [1] 0.0000000 0.3037374 0.0000000 0.6116693 0.2837157
##  (between_SS / total_SS =  97.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
a <- as.data.frame(km.res$centers)

length(km.res$cluster) #Coi so luong observation 
## [1] 11
nrow(Shopping_mall_test)
## [1] 11
Shopping_mall_test$clusterk <- as.factor(km.res$cluster)
kable(head(Shopping_mall_test))
Store.ID sumTraffic sumInteraction sumTransaction meanBasetSize clusterk
108 1360 1215 402 2.098882 2
220 1147 986 0 0.000000 3
228 837 691 110 1.968690 2
38 14704 11794 4298 2.901002 1
79 3637 2953 627 2.426727 4
83 9406 8389 2405 2.641927 5