true
Clear the WorkSpace
rm(list = ls())
Pepare the Packages
packages <- c("ggplot2"
                , "lemon"
                , "gridExtra" # For Q1
                , "ggrepel"   # For labels in Q2.b
                , "scales"
              , "dplyr",
              "lubridate",
              "tidyr"
              
)
for (i in 1:length(packages)) {
  if (!packages[i] %in% rownames(installed.packages())) {
    install.packages(packages[i]
                     , repos = "http://cran.rstudio.com/"
                     , dependencies = TRUE
    )
  }
  library(packages[i], character.only = TRUE)
}
## 
## 载入程辑包:'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## 载入程辑包:'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
Set Work Disctionary
setwd("D:/Study/Tools 4 Data Analysis/R Assignment 3")
Load the Data
sales <- read.csv("D:/Study/Tools 4 Data Analysis/R Assignment 3/r.hw03.sales.csv"
                                    , check.names = FALSE
                                    , stringsAsFactors = FALSE
                                    , na.strings = ""
                                    )

items <- read.csv("D:/Study/Tools 4 Data Analysis/R Assignment 3/r.hw03.items.csv"
                                    , check.names = FALSE
                                    , stringsAsFactors = FALSE
                                    , na.strings = ""
                                    )


# Merge data together
sales <- merge(sales, items, by="item.id", all.x=T, all.y=F)
  
# Reorder variables
var.order <- c("date"
                , "category"
                , "subcategory"
                , "item.name"
                , "volume"
                , "price"
                , "sale.bottles"
                , "sale.volume"
                , "sale.dollars"
                )

sales <- sales[var.order]

Q1

Prepare Data for Heatmap

sales$date  <- as.Date(sales$date)

dollar.sales.date <- aggregate(sale.dollars ~ date, sales, sum)

year2015 <- seq(from=as.Date("2015-01-01"), to=as.Date("2015-12-31"), by="day")

sales.dates <- data.frame(date = year2015)

daily.sales <- merge(sales.dates, dollar.sales.date, by="date", all.x=TRUE)

daily.sales$day <- as.numeric(format(daily.sales$date, "%d"))

daily.sales$weekday <- factor(weekdays(daily.sales$date, abbreviate = TRUE),
                              levels = c("周一", "周二", "周三", "周四", "周五", "周六", "周日"), 
                              ordered=TRUE
)

daily.sales$month <- factor(months(daily.sales$date, abbreviate = TRUE), 
                            levels = c("1月", "2月", "3月", "4月", "5月", "6月", "7月", "8月", "9月", "10月", "11月", "12月"),
                            ordered = TRUE
)
    
daily.sales$week <- as.numeric(format(daily.sales$date, "%W")) + 1

weeknum <- as.numeric(format(as.Date(cut(daily.sales$date, "month")), "%W"))

daily.sales$week <- daily.sales$week - weeknum
# 
# chinese_weekdays <- c("星期日", "星期一", "星期二", "星期三", "星期四", "星期五", "星期六")
# english_weekdays <- c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")
# 
# chinese_months <- c("一月", "二月", "三月", "四月", "五月", "六月", "七月", "八月", "九月", "十月", "十一月", "十二月")
# english_months <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
# 
# weekday_lookup <- setNames(english_weekdays, chinese_weekdays)
# month_lookup <- setNames(english_months, chinese_months)
# 
# daily_sales$weekday <- weekday_lookup[as.character(daily_sales$weekday)]
# daily_sales$month <- month_lookup[as.character(daily_sales$month)]
The graph will be so busted after I try to translate Chinese into English, So I just left them there. Still don’t know why its Chinese, I even change the config file in the root directory and windows registry…

Plot

heatmap <- ggplot(daily.sales, aes(x = weekday,
                                   y = week,
                                   fill = factor(cut(sale.dollars,
                           breaks = c(0, 7*10^5, 1*10^6, 1.3*10^6, 1.7*10^6, 2*10^6),
                           labels=c("0-1 min", "1-1.2 min", "1.2 - 1.4min", "1.4-1.6min", "1.6-2 min")
                      )))) +
  geom_tile(colour = "white") + 
  geom_text(aes(label = day)) + 
  facet_rep_wrap( ~ month 
                           , ncol = 3
                           , strip.position = "top"
                           , repeat.tick.labels = TRUE
                           ) +
  scale_y_reverse() +
  scale_x_discrete(limits = levels(daily.sales$weekday), position = "top")+
  theme(strip.background = element_blank(), strip.placement = "outside", axis.text.y = element_blank()
                        , legend.direction = "horizontal"
                        , legend.position = "bottom"
                        , legend.title = element_blank()) +
  labs(x = "", y = "", title = "Daily Total Sales, 2015")


heatmap

Q2

Q2a

Prepare Data for Graphs

sales$price.L <- sales$price / ((sales$volume)/1000.0)

sales$category <- factor(sales$category, 
                         levels=c("Amaretto", "Brandy", "Distilled Spirits", "Gin", "Rum","Schnapps", "Tequila", "Vodka", "Whisky", "Misc"), 
                         ordered = T)

category.price.L <- sales[c("category", "price.L")]

quantile.limit <- quantile(sales$price.L, 0.95)
data.filtered <- dplyr::filter(sales, price.L <= quantile.limit)

box-and-whisker plots

boxplot <- ggplot(data.filtered,
                aes(x = category,
                y = price.L,
                fill = category)) +
  geom_boxplot(outlier.fill = NULL) + 
  coord_flip() +
  scale_x_discrete(limits = rev(levels(category.price.L$category))) +
  scale_y_continuous(breaks = seq(from = 0, to = 40, by = 2),
                              labels = seq(from = 0, to = 40, by = 2)) +
  labs(x = "Category", y = "Price per liter", title = "Categories", subtitle = "Excluding top 5% values") +
  theme(legend.title = element_blank())

boxplot

Q2b

Prepare data for plotting

Q2b <- sales %>%
  group_by(subcategory, category) %>%
  summarize(
    total_dollar_sales = sum(sale.dollars),
    total_liter_sales = sum(sale.volume),
    .groups = 'drop'
  )  %>%
  mutate(price.L.w = total_dollar_sales / total_liter_sales)

top_subcategories <- Q2b %>%
  arrange(desc(total_dollar_sales)) %>%
  slice(1:10) %>%
  pull(subcategory)

Plot

Q2bPlot <- ggplot(Q2b, aes(x = price.L.w, y = total_liter_sales / 1000, color = category)) +
  geom_point() + 
  scale_color_brewer(palette = "Set3") + 
  geom_text_repel(
    aes(label = ifelse(subcategory %in% top_subcategories, paste(subcategory, round(price.L.w, 2)), '')),
    max.overlaps = 20
  ) + 
  theme_minimal() +
  theme(legend.position = c(0.9, 0.9), legend.background = element_blank()) + 
  scale_x_continuous(breaks = seq(floor(min(Q2b$price.L.w)), ceiling(max(Q2b$price.L.w)), by = 2)) + 
  scale_y_continuous(breaks = seq(0, max(Q2b$total_liter_sales / 1000), by = 250)) +
  labs(x = "Weighted Average Price per Liter ($)", 
       y = "Total Volume Sold (Thousands of Liters)", 
       title = "Liquor Subcategories", subtitle = "Price vs. Quantity"
       )

Q2bPlot

Q3

Prepare Data

sales$month <- month(sales$date, label = TRUE, abbr = TRUE)
sales$weekday <- wday(sales$date, label = TRUE, abbr = TRUE)

monthly.cat.sales <- aggregate(sale.dollars ~ category + month, sales, sum)
weekday.cat.sales <- aggregate(sale.dollars ~ category + weekday, sales, sum)

names(monthly.cat.sales)[3] <- "monthly.sales"

temp <- aggregate(monthly.sales ~ category, monthly.cat.sales, sum)

names(temp)[2] <- "total.month"

names(weekday.cat.sales)[3] <- "each.weekday"

temp_weekday <- aggregate(each.weekday ~ category, weekday.cat.sales, sum)

names(temp_weekday)[2] <- "total.weekday"

Q3a <- merge(monthly.cat.sales, temp, all.x = TRUE)

Q3a$share <- Q3a$monthly / Q3a$total.month

Q3b <- merge(weekday.cat.sales, temp_weekday, all.x = TRUE)

Q3b$share <- Q3b$each.weekday / Q3b$total.weekday

monthlyPlot

monthlyplot <- ggplot(Q3a, aes(x = month, y = share * 100, group = category, color = category)) +
  geom_line() +
  geom_point(aes(color = category), size = 2) + 
  scale_color_brewer(palette = "Set3") +
  theme_minimal() +
  labs(y = "", x = "Month", title = "Monthly Sales Percentage (%)")

monthlyplot

Weekday Plot

WeekdayPlot <- ggplot(Q3b, aes(x = weekday, y = share * 100, group = category, color = category)) +
  geom_line() +
  geom_point(aes(color = category), size = 2) +
  scale_color_brewer(palette = "Set3") +
  theme_minimal() +
  labs(y = "", x = "Weekday", title = "Weekday Sales Percentage (%)")

WeekdayPlot

Side-by-Side Plot

WeekdayPlot <- WeekdayPlot + theme(legend.position = "none")

grid.arrange(monthlyplot, WeekdayPlot, ncol = 2)

Q4

Prepare the data

sales_ranking <- sales %>%
  group_by(category) %>%
  summarize(
    sale_dollars = sum(sale.dollars),
    sale_volume = sum(sale.volume),
    sale_bottles = sum(sale.bottles)
  ) %>%
  pivot_longer(
    cols = starts_with("sale"),
    names_to = "sale_type",
    values_to = "sale"
  # ) %>%
  # mutate(
  #   sale_type = case_when(
  #     sale_type == "sale_dollars" ~ 1,
  #     sale_type == "sale_volume" ~ 2,
  #     sale_type == "sale_bottles" ~ 3
  #   )
  )

sales_ranking <- sales_ranking %>%
  group_by(sale_type) %>%
  mutate(rank = rank(-sale))

Plot

ggplot(sales_ranking, aes(x = sale_type, y = rank, group = category, color = category)) +
  geom_line() +
  geom_point(size = 5) +
  scale_y_reverse(breaks = 1:max(sales_ranking$rank)) + # To make rank 1 at the top
  labs(title = "Liquor Category Rankings", x = "Sale Type", y = "Rank") +
  theme_minimal() +
  theme(legend.position = "none", # Remove legend if you don't want it
        axis.text.x = element_text(angle = 0, hjust = 1)) + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  theme(axis.title.x = element_blank(),
  axis.title.y = element_blank(),
  axis.text.y = element_blank(),
  axis.ticks = element_blank()) +
  geom_text(aes(label = rank), vjust = 0.4, size = 3, colour = "white")