#---- Change factor to Date
Saledata$Date <- as.POSIXct(Saledata$Date, format = '%B %d, %Y')
Saledata$Day <- weekdays(as.Date(Saledata$Date))
#---- Adding Day of Week and arrage from Mon to Sun
Saledata$Day <- factor(Saledata$Day, levels= c("Monday",
"Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
Saledata <- Saledata[order(Saledata$Day), ]
#--- Check min date - max date of each unique value
library(dplyr)
df1 <- Saledata %>%
group_by(Store.ID) %>%
mutate(minDia=min(Date, na.rm=T), maxDia=max(Date, na.rm=T))
df2 <- df1 %>%
group_by(Store.ID) %>%
mutate(minDia=min(Date, na.rm=T), maxDia=max(Date, na.rm=T)) %>%
ungroup() %>%
distinct(Store.ID, minDia, maxDia)
table(Saledata$Period)##
## June 05 - June 14 June 19 - June 28 May 13 - May 17 May 20 - May 24
## 2709 3351 1386 1491
Saledata$Store.ID <- as.character(Saledata$Store.ID)# Peak time in Date
WHEN_PeakTime <- Saledata %>% group_by(Store.Type,Hour) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))
# Peak time in Week-Date
Day_WHEN_PeakTime <- Saledata %>% group_by(Store.Type,Day) %>% summarise(sumTraffic =sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))# Histogram
cor(Saledata$Transaction, Saledata$Avg.Basket.Size) # No correlation ## [1] 0.2636961
Saledata1 <- Saledata %>% filter(Store.Type == "SHOPPING_MALL")
hist(Saledata1$Avg.Basket.Size)Saledata2 <- Saledata %>% filter(Store.Type == "INDEPENDENT")
hist(Saledata2$Avg.Basket.Size)par(
mfrow=c(1,2),
mar=c(4,4,1,0)
)
hist(Saledata1$Avg.Basket.Size, breaks=30 , xlim=c(0,30) , col=rgb(1,0,0,0.5) , xlab="Avg.BasketSize" , ylab="Frequency" , main="" )
hist(Saledata2$Avg.Basket.Size, breaks=30 , xlim=c(0,30) , col=rgb(0,0,1,0.5) , xlab="Avg.BasketSize" , ylab="Frequency" , main="")quantile(Saledata1$Avg.Basket.Size, c(0.0, 0.25, 0.50, 0.75, 1.0))## 0% 25% 50% 75% 100%
## 0.000000 2.000000 2.666667 3.650082 19.000000
quantile(Saledata2$Avg.Basket.Size, c(0.0, 0.25, 0.50, 0.75, 1.0))## 0% 25% 50% 75% 100%
## 0 2 3 4 26
# 1. Peak time - Week day
SHOPPING_MALL <- Day_WHEN_PeakTime %>% filter(Store.Type == "SHOPPING_MALL")
SHOPPING_MALL_long <- gather(SHOPPING_MALL, Condition, Measurement, sumTraffic:sumTransaction, factor_key=TRUE)
SHOPPING_MALL_long <- SHOPPING_MALL_long %>% filter(Condition == "sumTraffic")
ggplot(SHOPPING_MALL_long, aes(fill=Condition, y=Measurement, x=Day)) + geom_bar(position="stack", stat="identity") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
scale_color_manual(values = c("#0073C2FF"))+
scale_fill_manual(values = c("#0073C2FF")) + geom_text(
aes(label = Measurement),
position = position_dodge(0.8),
vjust = -0.3, size = 5.0
) #stack barggplot(data=SHOPPING_MALL_long, aes(x=Day, y=Measurement, group=1)) +
geom_line()+
geom_point()+ theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
scale_color_manual(values = c("#0073C2FF"))+
scale_fill_manual(values = c("#0073C2FF")) + geom_text(
aes(label = Measurement),
position = position_dodge(0.8),
vjust = -0.3, size = 4.0
) # line cHART# 2. Peak time - Hour in a Date
SHOPPING_MALL <- WHEN_PeakTime %>% filter(Store.Type == "SHOPPING_MALL")
SHOPPING_MALL_long <- gather(SHOPPING_MALL, Condition, Measurement, sumTraffic:sumTransaction, factor_key=TRUE)
ggplot(data = SHOPPING_MALL, aes(x = Hour, y = sumTraffic))+
geom_line(color = "#00AFBB", size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_area(fill = "lightpink") ggplot(SHOPPING_MALL_long, aes(x = Hour, y = Measurement)) +
geom_line(aes(color = Condition), size = 1.0) +
scale_color_manual(values = c("#EFC000FF", "#6A00A8FF", "#B12A90FF")) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_text(aes(label = Measurement), position = position_dodge(0.8), vjust = -0.3, size = 2.0)# 3. Conversion rate accorrding to time
SHOPPING_MALL <- SHOPPING_MALL %>% mutate(noInteraction_Rate = paste0(round(((sumTraffic - sumInteraction)/sumTraffic) * 100,0), "%"))
SHOPPING_MALL_Performance <- SHOPPING_MALL %>%
mutate(Traffic_conRate = round((sumTransaction / sumTraffic) * 100, 2)) %>%
mutate(Interaction_conRate = round((sumTransaction / sumInteraction) * 100, 2))
SHOPPING_MALL_Performance ## # A tibble: 15 x 8
## # Groups: Store.Type [1]
## Store.Type Hour sumTraffic sumInteraction sumTransaction noInteraction_R…
## <fct> <int> <int> <int> <int> <chr>
## 1 SHOPPING_… 8 27 18 3 33%
## 2 SHOPPING_… 9 437 357 114 18%
## 3 SHOPPING_… 10 3372 2855 925 15%
## 4 SHOPPING_… 11 3912 3335 1084 15%
## 5 SHOPPING_… 12 3495 2994 956 14%
## 6 SHOPPING_… 13 3715 3171 967 15%
## 7 SHOPPING_… 14 3852 3308 1000 14%
## 8 SHOPPING_… 15 3940 3417 1006 13%
## 9 SHOPPING_… 16 3809 3286 996 14%
## 10 SHOPPING_… 17 3865 3243 1011 16%
## 11 SHOPPING_… 18 4884 4096 1250 16%
## 12 SHOPPING_… 19 7532 6395 1974 15%
## 13 SHOPPING_… 20 9217 7842 2316 15%
## 14 SHOPPING_… 21 4665 4015 1189 14%
## 15 SHOPPING_… 22 45 44 22 2%
## # … with 2 more variables: Traffic_conRate <dbl>, Interaction_conRate <dbl>
# 4. Funnel Traffic
SHOPPING_MALL_F <- SHOPPING_MALL %>% group_by(Store.Type) %>% summarise(Traffic = sum(sumTraffic), Interaction = sum(sumInteraction), Convert = sum(sumTransaction))
SHOPPING_MALL_F$Store.Type <- NULL
SHOPPING_MALL_F <- gather(SHOPPING_MALL_F, Condition, Measurement, Traffic:Convert, factor_key=TRUE) # convert wide to long
library(plotly)##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
fig <- plot_ly()
fig <- fig %>%
add_trace(type = "funnel",
y = SHOPPING_MALL_F$Condition,
x = SHOPPING_MALL_F$Measurement,
textposition = "inside",
textinfo = "value+percent initial",
opacity = 0.65,
marker = list(color = c("deepskyblue", "lightsalmon", "tan", "teal", "silver"),
line = list(width = c(4, 2, 2, 3, 1, 1), color = c("wheat", "wheat", "blue", "wheat", "wheat"))),
connector = list(line = list(color = "royalblue", dash = "dot", width = 3)))
fig <- fig %>%
layout(yaxis = list(categoryarray = SHOPPING_MALL_F$Condition))
fig# 1. Peak time - when - Day
INDEPENDENT <- Day_WHEN_PeakTime %>% filter(Store.Type == "INDEPENDENT")
INDEPENDENT_long <- gather(INDEPENDENT, Condition, Measurement, sumTraffic:sumTransaction, factor_key=TRUE)
INDEPENDENT_long <- INDEPENDENT_long %>% filter(Condition == "sumTraffic")
ggplot(INDEPENDENT_long, aes(fill=Condition, y=Measurement, x=Day)) +
geom_bar(position="stack", stat="identity") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
scale_color_manual(values = c("#EFC000FF"))+
scale_fill_manual(values = c("#EFC000FF")) + geom_text(
aes(label = Measurement),
position = position_dodge(0.8),
vjust = -0.3, size = 3.5
) #stack barggplot(data=INDEPENDENT_long, aes(x=Day, y=Measurement, group=1)) +
geom_line()+
geom_point()+ theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
scale_color_manual(values = c("#0073C2FF"))+
scale_fill_manual(values = c("#0073C2FF")) + geom_text(
aes(label = Measurement),
position = position_dodge(0.8),
vjust = -0.3, size = 4.0
) # line cHART# 2. Peak time - when - Hour
INDEPENDENT <- WHEN_PeakTime %>% filter(Store.Type == "INDEPENDENT")
INDEPENDENT_long <- gather(INDEPENDENT, Condition, Measurement, sumTraffic:sumTransaction, factor_key=TRUE)
ggplot(data = INDEPENDENT, aes(x = Hour, y = sumTraffic))+
geom_line(color = "#00AFBB", size = 0.3) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_area(fill = "lightblue")ggplot(INDEPENDENT_long, aes(x = Hour, y = Measurement)) +
geom_line(aes(color = Condition), size = 1.0) +
scale_color_manual(values = c("#b9005f", "#5fb900","#005fb9")) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) # 3. Conversion rate accorrding to time
INDEPENDENT <- INDEPENDENT %>% mutate(noInteraction_Rate = paste0(round(((sumTraffic - sumInteraction)/sumTraffic) * 100,0), "%"))
INDEPENDENT_Performance <- INDEPENDENT %>%
mutate(Traffic_conRate = round((sumTransaction / sumTraffic) * 100, 2)) %>%
mutate(Interaction_conRate = round((sumTransaction / sumInteraction) * 100, 2))
INDEPENDENT_Performance## # A tibble: 15 x 8
## # Groups: Store.Type [1]
## Store.Type Hour sumTraffic sumInteraction sumTransaction noInteraction_R…
## <fct> <int> <int> <int> <int> <chr>
## 1 INDEPENDE… 8 188 148 47 21%
## 2 INDEPENDE… 9 1665 1457 603 12%
## 3 INDEPENDE… 10 2495 2274 968 9%
## 4 INDEPENDE… 11 2453 2225 966 9%
## 5 INDEPENDE… 12 1942 1785 806 8%
## 6 INDEPENDE… 13 1660 1518 675 9%
## 7 INDEPENDE… 14 1911 1746 683 9%
## 8 INDEPENDE… 15 2447 2260 915 8%
## 9 INDEPENDE… 16 2993 2780 1206 7%
## 10 INDEPENDE… 17 3949 3605 1661 9%
## 11 INDEPENDE… 18 4479 4153 1972 7%
## 12 INDEPENDE… 19 5954 5512 2453 7%
## 13 INDEPENDE… 20 6874 6342 2654 8%
## 14 INDEPENDE… 21 4078 3694 1468 9%
## 15 INDEPENDE… 22 77 72 35 6%
## # … with 2 more variables: Traffic_conRate <dbl>, Interaction_conRate <dbl>
# 4. Funnel Traffic
INDEPENDENT_F <- INDEPENDENT %>% group_by(Store.Type) %>% summarise(Traffic = sum(sumTraffic), Interaction = sum(sumInteraction), Convert = sum(sumTransaction))
INDEPENDENT_F$Store.Type <- NULL
INDEPENDENT_F <- gather(INDEPENDENT_F, Condition, Measurement, Traffic:Convert, factor_key=TRUE) # convert wide to long
library(plotly)
fig <- plot_ly()
fig <- fig %>%
add_trace(type = "funnel",
y = INDEPENDENT_F$Condition,
x = INDEPENDENT_F$Measurement,
textposition = "inside",
textinfo = "value+percent initial",
opacity = 0.65,
marker = list(color = c("deepskyblue", "lightsalmon", "tan", "teal", "silver"),
line = list(width = c(4, 2, 2, 3, 1, 1), color = c("wheat", "wheat", "blue", "wheat", "wheat"))),
connector = list(line = list(color = "royalblue", dash = "dot", width = 3)))
fig <- fig %>%
layout(yaxis = list(categoryarray = INDEPENDENT_F$Condition))
fig# 1. Funnel
library(plotly)
fig <- plot_ly(
type = "funnel",
name = 'Shopping_Mall',
y = SHOPPING_MALL_F$Condition,
x = SHOPPING_MALL_F$Measurement,
textinfo = "value+percent initial")
fig <- fig %>%
add_trace(
type = "funnel",
name = 'Independent',
orientation = "h",
y = INDEPENDENT_F$Condition,
x = INDEPENDENT_F$Measurement,
textposition = "inside",
textinfo = "value+percent previous")
fig <- fig %>%
layout(yaxis = list(categoryarray = c("Traffic", "Interaction", "Convert")))
fig# 2. Interaction
# Interaction Rate by Day in Week
Saledata_1 <- Saledata %>% group_by(Store.Type,Day) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))
Saledata_1 <- Saledata_1 %>% mutate(Interaction_conRate = paste(round((sumInteraction / sumTraffic) * 100, 1),'%'))
Saledata_1$sumTraffic <- NULL
Saledata_1$sumInteraction <- NULL
Saledata_1$sumTransaction <- NULL
ggplot(Saledata_1, aes(x=Day, y=Interaction_conRate, group=Store.Type)) +
geom_line(aes(linetype=Store.Type, color=Store.Type))+
geom_point(aes(color=Store.Type))+
theme(legend.position="top") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_text(
aes(label = Interaction_conRate),
position = position_dodge(0.8),
vjust = -0.3, size = 3.5
) #stack bar# Interaction Rate by Hour in Day
Saledata_1 <- Saledata %>% group_by(Store.Type,Hour) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))
Saledata_1 <- Saledata_1 %>% mutate(Interaction_conRate = paste(round((sumInteraction / sumTraffic) * 100, 1),'%'))
Saledata_1$sumTraffic <- NULL
Saledata_1$sumInteraction <- NULL
Saledata_1$sumTransaction <- NULL
ggplot(Saledata_1, aes(x=Hour, y=Interaction_conRate, group=Store.Type)) +
geom_line(aes(linetype=Store.Type, color=Store.Type))+
geom_point(aes(color=Store.Type))+
theme(legend.position="top") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_text(
aes(label = Interaction_conRate),
position = position_dodge(0.8),
vjust = -0.3, size = 3.5)# 3. Transaction
# Transaction Rate by Day in Week
Saledata_1 <- Saledata %>% group_by(Store.Type,Day) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))
Saledata_1 <- Saledata_1 %>% mutate(Transaction_conRate = paste(round((sumTransaction / sumInteraction) * 100, 1),'%'))
Saledata_1$sumTraffic <- NULL
Saledata_1$sumInteraction <- NULL
Saledata_1$sumTransaction <- NULL
ggplot(Saledata_1, aes(x=Day, y=Transaction_conRate, group=Store.Type)) +
geom_line(aes(linetype=Store.Type, color=Store.Type))+
geom_point(aes(color=Store.Type))+
theme(legend.position="top") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_text(
aes(label = Transaction_conRate),
position = position_dodge(0.8),
vjust = -0.3, size = 3.5
) #stack bar# Transaction Rate by Hour in Day
Saledata_1 <- Saledata %>% group_by(Store.Type,Hour) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))
Saledata_1 <- Saledata_1 %>% mutate(Transaction_conRate = paste(round((sumTransaction / sumInteraction) * 100, 2),'%'))
Saledata_1$sumTraffic <- NULL
Saledata_1$sumInteraction <- NULL
Saledata_1$sumTransaction <- NULL
ggplot(Saledata_1, aes(x=Hour, y=Transaction_conRate, group=Store.Type)) +
geom_line(aes(linetype=Store.Type, color=Store.Type))+
geom_point(aes(color=Store.Type))+
theme(legend.position="top") + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) + geom_text(
aes(label = Transaction_conRate),
position = position_dodge(0.8),
vjust = -0.3, size = 3.5) #stack bar# 4 Performance over period
# Store.Type
Saledata_2 <- Saledata %>% group_by(Store.Type,Period) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))
# Change long to wide
library(reshape2)##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
meltSaledata2 <- melt(Saledata_2, id.vars=1:2)
u <- dcast(meltSaledata2, Store.Type ~ Period + variable, fun.aggregate = sum)
library(writexl)
write_xlsx(x = u, path = "u.xlsx",col_names = TRUE)## [1] "/Users/admin/Documents/Linh-R Studio/SaleData/u.xlsx"
#Store.ID
Saledata_3 <- Saledata %>% filter(Store.Type == "INDEPENDENT") %>% group_by(Store.ID,Period) %>% summarise(sumTraffic = sum(Traffic), sumInteraction = sum(Interaction), sumTransaction = sum(Transaction))
# Change long to wide
meltSaledata3 <- melt(Saledata_3, id.vars=1:2)
z <- dcast(meltSaledata3, Store.ID ~ Period + variable, fun.aggregate = sum)
library(writexl)
write_xlsx(x = z, path = "z2.xlsx",col_names = TRUE)## [1] "/Users/admin/Documents/Linh-R Studio/SaleData/z2.xlsx"
Shopping_mall <- Saledata %>% filter(Store.Type == "SHOPPING_MALL")
library(dplyr)
df1 <- Shopping_mall %>%
group_by(Store.ID) %>%
mutate(minDia=min(Date, na.rm=T), maxDia=max(Date, na.rm=T))
df2 <- df1 %>%
group_by(Store.ID) %>%
mutate(minDia=min(Date, na.rm=T), maxDia=max(Date, na.rm=T)) %>%
ungroup() %>%
distinct(Store.ID, minDia, maxDia)Traffic <- Shopping_mall %>% group_by(Store.ID) %>% summarise(TotalTraffic = sum(Traffic))
library(treemap)
treemap(Traffic,
index=c("Store.ID"), #single index
vSize="TotalTraffic",
vColor="TotalTraffic",
palette="Purples",
type="value") # Line chart by date
Traffic <- Shopping_mall %>% group_by(Store.ID, Date) %>% summarise(TotalTraffic = sum(Traffic))
library("gridExtra")##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
t1 <- ggplot(data=Traffic[which(Traffic$Date <= "2020-05-17"),], aes(x = Date, y = TotalTraffic)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
t2 <- ggplot(data=Traffic[which(Traffic$Date <= "2020-05-24" & Traffic$Date >= "2020-05-20" ),], aes(x = Date, y = TotalTraffic)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
t3 <- ggplot(data=Traffic[which(Traffic$Date <= "2020-06-14" & Traffic$Date >= "2020-06-05" ),], aes(x = Date, y = TotalTraffic)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
t4 <- ggplot(data=Traffic[which(Traffic$Date <= "2020-06-28" & Traffic$Date >= "2020-06-19" ),], aes(x = Date, y = TotalTraffic)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
grid.arrange(t1,t2)grid.arrange(t3,t4)Traffic <- Shopping_mall %>% group_by(Store.ID, Date) %>% summarise(TotalTraffic = sum(Traffic))
# Demo for "2020-05-13" to "2020-05-17"
table(Saledata$Period)##
## June 05 - June 14 June 19 - June 28 May 13 - May 17 May 20 - May 24
## 2709 3351 1386 1491
my_theme <- function() {
# Colors
color.background = "white"
color.text = "#22211d"
# Begin construction of chart
theme_bw(base_size=15) +
# Format background colors
theme(panel.background = element_rect(fill=color.background, color=color.background)) +
theme(plot.background = element_rect(fill=color.background, color=color.background)) +
theme(panel.border = element_rect(color=color.background)) +
theme(strip.background = element_rect(fill=color.background, color=color.background)) +
# Format the grid
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.y = element_blank()) +
theme(axis.ticks = element_blank()) +
# Format the legend
theme(legend.position = "none") +
# Format title and axis labels
theme(plot.title = element_text(color=color.text, size=20, face = "bold")) +
theme(axis.title.x = element_text(size=14, color="black", face = "bold")) +
theme(axis.title.y = element_text(size=14, color="black", face = "bold", vjust=1.25)) +
theme(axis.text.x = element_text(size=10, vjust=0.5, hjust=0.5, color = color.text)) +
theme(axis.text.y = element_text(size=10, color = color.text)) +
theme(strip.text = element_text(face = "bold")) +
# Plot margins
theme(plot.margin = unit(c(0.35, 0.2, 0.3, 0.35), "cm"))
}
Traffic1 <- subset(Traffic, Date <= "2020-05-17")
Transformed <- Traffic1 %>% group_by(Store.ID) %>% transform(., day=match(Date, unique(Date)))
df.rankings <- Transformed %>%
group_by(Date) %>%
arrange(Date, desc(TotalTraffic), Store.ID) %>%
mutate(ranking = row_number()) %>% as.data.frame()
df.rankings$Store.ID <- as.character(df.rankings$Store.ID)
ggplot(data = df.rankings, aes(x = day, y = ranking, group = Store.ID)) +
geom_line(aes(color = Store.ID, alpha = 1), size = 1) +
geom_point(aes(color = Store.ID, alpha = 1), size = 3) +
scale_y_reverse(breaks = 1:nrow(df.rankings)) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))Shopping_mall <- Saledata %>% filter(Store.Type == "SHOPPING_MALL")
Mall_Interaction <- Shopping_mall %>% group_by(Store.ID) %>% summarise(Total_Interaction = sum(Interaction), TotalTraffic = sum(Traffic)) %>% mutate(Interaction_conRate = round((Total_Interaction / TotalTraffic) * 100, 2))
Mall_Interaction## # A tibble: 11 x 4
## Store.ID Total_Interaction TotalTraffic Interaction_conRate
## <chr> <int> <int> <dbl>
## 1 108 1215 1360 89.3
## 2 220 986 1147 86.0
## 3 228 691 837 82.6
## 4 38 11794 14704 80.2
## 5 79 2953 3637 81.2
## 6 83 8389 9406 89.2
## 7 84 9818 11488 85.5
## 8 90 3549 4162 85.3
## 9 95 1498 1591 94.2
## 10 96 4070 4610 88.3
## 11 97 3413 3825 89.2
library(treemap)
treemap(Mall_Interaction,
index=c("Store.ID"), #single index
vSize="Interaction_conRate",
vColor="Interaction_conRate",
palette="Set3",
type="value") # Interaction line graph by date
Mall_Interaction <- Shopping_mall %>% group_by(Store.ID, Date) %>% summarise(Total_Interaction = sum(Interaction), TotalTraffic = sum(Traffic)) %>% mutate(Interaction_conRate = round((Total_Interaction / TotalTraffic) * 100, 2))
a1 <- ggplot(data=Mall_Interaction[which(Mall_Interaction$Date <= "2020-05-17"),], aes(x = Date, y = Interaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
a2 <- ggplot(data=Mall_Interaction[which(Mall_Interaction$Date <= "2020-05-24" & Mall_Interaction$Date >= "2020-05-20" ),], aes(x = Date, y = Interaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
a3 <- ggplot(data=Mall_Interaction[which(Mall_Interaction$Date <= "2020-06-14" & Mall_Interaction$Date >= "2020-06-05" ),], aes(x = Date, y = Interaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
a4 <- ggplot(data=Mall_Interaction[which(Mall_Interaction$Date <= "2020-06-28" & Mall_Interaction$Date >= "2020-06-19" ),], aes(x = Date, y = Interaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
grid.arrange(a1,a2)grid.arrange(a3,a4)# Demo Date <= "2020-05-17"
Mall_Interaction1 <- subset(Mall_Interaction, Date <= "2020-05-17")
Transformed <- Mall_Interaction1 %>% group_by(Store.ID) %>% transform(., day=match(Date, unique(Date)))
df.rankings <- Transformed %>%
group_by(Date) %>%
arrange(Date, desc(Interaction_conRate), Store.ID) %>%
mutate(ranking = row_number()) %>% as.data.frame()
df.rankings$Store.ID <- as.character(df.rankings$Store.ID)
ggplot(data = df.rankings, aes(x = day, y = ranking, group = Store.ID)) +
geom_line(aes(color = Store.ID, alpha = 1), size = 1.2) +
geom_point(aes(color = Store.ID, alpha = 1), size = 1.5) +
scale_y_reverse(breaks = 1:nrow(df.rankings))Shopping_mall <- Saledata %>% filter(Store.Type == "SHOPPING_MALL")
Mall_Transaction <- Shopping_mall %>% group_by(Store.ID) %>% summarise(Total_Transaction = sum(Transaction), Total_Interaction = sum(Interaction)) %>% mutate(Transaction_conRate = round((Total_Transaction / Total_Interaction) * 100, 2))
library(treemap)
treemap(Mall_Transaction,
index=c("Store.ID"), #single index
vSize="Transaction_conRate",
vColor="Transaction_conRate",
palette="RdBu",
type="value") # type = value thi co scale bar### By date
Mall_Transaction <- Shopping_mall %>% group_by(Store.ID, Date) %>% summarise(Total_Transaction = sum(Transaction), Total_Interaction = sum(Interaction)) %>% mutate(Transaction_conRate = round((Total_Transaction / Total_Interaction) * 100, 2))
c1 <- ggplot(data=Mall_Transaction[which(Mall_Transaction$Date <= "2020-05-17"),], aes(x = Date, y = Transaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
c2 <- ggplot(data=Mall_Transaction[which(Mall_Transaction$Date <= "2020-05-24" & Mall_Transaction$Date >= "2020-05-20"),], aes(x = Date, y = Transaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
c3 <- ggplot(data=Mall_Transaction[which(Mall_Transaction$Date <= "2020-06-14" & Mall_Transaction$Date >= "2020-06-05" ),], aes(x = Date, y = Transaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
c4 <- ggplot(data=Mall_Transaction[which(Mall_Transaction$Date <= "2020-06-28" & Mall_Transaction$Date >= "2020-06-19" ),], aes(x = Date, y = Transaction_conRate)) + geom_line(aes(color = as.factor(Store.ID)), size = 0.7) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
grid.arrange(c1,c2)grid.arrange(c3,c4)#Demo Date <= "2020-06-14" & Date >= "2020-06-05"
Mall_Transaction1 <- subset(Mall_Transaction, Date <= "2020-06-14" & Date >= "2020-06-05")
Transformed <- Mall_Transaction1 %>% group_by(Store.ID) %>% transform(., day=match(Date, unique(Date)))
df.rankings <- Transformed %>%
group_by(Date) %>%
arrange(Date, desc(Transaction_conRate), Store.ID) %>%
mutate(ranking = row_number()) %>% as.data.frame()
df.rankings$Store.ID <- as.character(df.rankings$Store.ID)
ggplot(data = df.rankings, aes(x = Date, y = ranking, group = Store.ID)) +
geom_line(aes(color = Store.ID, alpha = 1), size = 1.2) +
geom_point(aes(color = Store.ID, alpha = 1), size = 1.5) +
scale_y_reverse(breaks = 1:nrow(df.rankings))Shopping_mall <- Saledata %>% filter(Store.Type == "SHOPPING_MALL")
library(tidyverse)
library(magrittr)##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
Shopping_mall_test <- Shopping_mall %>% group_by(Store.ID) %>% summarise(sumTraffic = sum(Traffic),sumInteraction = sum(Interaction),sumTransaction = sum(Transaction),meanBasetSize = mean(Avg.Basket.Size))
Shopping_mall_test$Store.ID <- as.character(Shopping_mall_test$Store.ID)
require(knitr)## Loading required package: knitr
kable(head(Shopping_mall_test))| Store.ID | sumTraffic | sumInteraction | sumTransaction | meanBasetSize |
|---|---|---|---|---|
| 108 | 1360 | 1215 | 402 | 2.098882 |
| 220 | 1147 | 986 | 0 | 0.000000 |
| 228 | 837 | 691 | 110 | 1.968690 |
| 38 | 14704 | 11794 | 4298 | 2.901002 |
| 79 | 3637 | 2953 | 627 | 2.426727 |
| 83 | 9406 | 8389 | 2405 | 2.641927 |
summary(Shopping_mall_test)## Store.ID sumTraffic sumInteraction sumTransaction
## Length:11 Min. : 837 Min. : 691 Min. : 0.0
## Class :character 1st Qu.: 1476 1st Qu.: 1356 1st Qu.: 409.5
## Mode :character Median : 3825 Median : 3413 Median :1000.0
## Mean : 5161 Mean : 4398 Mean :1346.6
## 3rd Qu.: 7008 3rd Qu.: 6230 3rd Qu.:2071.0
## Max. :14704 Max. :11794 Max. :4298.0
## meanBasetSize
## Min. :0.000
## 1st Qu.:2.263
## Median :2.642
## Mean :2.371
## 3rd Qu.:2.864
## Max. :2.984
Shopping_mall_test <- na.omit(Shopping_mall_test)
summary(Shopping_mall_test)## Store.ID sumTraffic sumInteraction sumTransaction
## Length:11 Min. : 837 Min. : 691 Min. : 0.0
## Class :character 1st Qu.: 1476 1st Qu.: 1356 1st Qu.: 409.5
## Mode :character Median : 3825 Median : 3413 Median :1000.0
## Mean : 5161 Mean : 4398 Mean :1346.6
## 3rd Qu.: 7008 3rd Qu.: 6230 3rd Qu.:2071.0
## Max. :14704 Max. :11794 Max. :4298.0
## meanBasetSize
## Min. :0.000
## 1st Qu.:2.263
## Median :2.642
## Mean :2.371
## 3rd Qu.:2.864
## Max. :2.984
dsData <- Shopping_mall_test
row.names(dsData) <- Shopping_mall_test$Store.ID## Warning: Setting row names on a tibble is deprecated.
dsData <- scale(dsData[,2:5])
summary(dsData)## sumTraffic sumInteraction sumTransaction meanBasetSize
## Min. :-0.9292 Min. :-0.9641 Min. :-1.0098 Min. :-2.7770
## 1st Qu.:-0.7919 1st Qu.:-0.7910 1st Qu.:-0.7027 1st Qu.:-0.1267
## Median :-0.2870 Median :-0.2561 Median :-0.2599 Median : 0.3173
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.3970 3rd Qu.: 0.4764 3rd Qu.: 0.5432 3rd Qu.: 0.5776
## Max. : 2.0509 Max. : 1.9237 Max. : 2.2131 Max. : 0.7176
set.seed(123)
km.res <- kmeans(dsData, 5, nstart = 25)
km.res## K-means clustering with 5 clusters of sizes 1, 3, 1, 4, 2
##
## Cluster means:
## sumTraffic sumInteraction sumTransaction meanBasetSize
## 1 2.0508877 1.9237355 2.2130817 0.6207611
## 2 -0.8376814 -0.8487407 -0.7775723 -0.2026117
## 3 -0.8625384 -0.8874087 -1.0097760 -2.7769747
## 4 -0.2368513 -0.2344965 -0.1800664 0.4322821
## 5 1.1360500 1.2239406 0.9248384 0.5174601
##
## Clustering vector:
## [1] 2 3 2 1 4 5 5 4 2 4 4
##
## Within cluster sum of squares by cluster:
## [1] 0.0000000 0.3037374 0.0000000 0.6116693 0.2837157
## (between_SS / total_SS = 97.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
a <- as.data.frame(km.res$centers)
length(km.res$cluster) #Coi so luong observation ## [1] 11
nrow(Shopping_mall_test)## [1] 11
Shopping_mall_test$clusterk <- as.factor(km.res$cluster)
kable(head(Shopping_mall_test))| Store.ID | sumTraffic | sumInteraction | sumTransaction | meanBasetSize | clusterk |
|---|---|---|---|---|---|
| 108 | 1360 | 1215 | 402 | 2.098882 | 2 |
| 220 | 1147 | 986 | 0 | 0.000000 | 3 |
| 228 | 837 | 691 | 110 | 1.968690 | 2 |
| 38 | 14704 | 11794 | 4298 | 2.901002 | 1 |
| 79 | 3637 | 2953 | 627 | 2.426727 | 4 |
| 83 | 9406 | 8389 | 2405 | 2.641927 | 5 |