Libraries
library(readxl)
library(tidyverse)
library(lubridate)
Data Import
bikes_tbl <- read_excel("./bikes.xlsx")
bikeshops_tbl <- read_excel("./bikeshops.xlsx")
orderlines_tbl <- read_excel("./orderlines.xlsx")
Data Joining and Wrangling
bike_orderlines_wrangled_tbl <- orderlines_tbl %>%
left_join(bikes_tbl, by = c("product.id" = "bike.id")) %>%
left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id")) %>%
separate(description,
into = c('category.1', 'category.2', 'frame.material'),
sep = ' - ') %>%
separate(location,
into = c('city', 'state'),
sep = ', ',
remove = FALSE) %>%
mutate(total.price = price * quantity) %>%
select(-...1, -location) %>%
select(contains('date'), contains('id'),
contains('order'),
quantity, price, total.price,
everything()) %>%
rename(order_date = order.date) %>%
set_names(names(.) %>% str_replace_all("\\.", "_"))
# Display first few rows
head(bike_orderlines_wrangled_tbl)
## # A tibble: 6 × 15
## order_date order_id customer_id product_id order_line quantity price
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2011-01-07 00:00:00 1 2 48 1 1 6070
## 2 2011-01-07 00:00:00 1 2 52 2 1 5970
## 3 2011-01-10 00:00:00 2 10 76 1 1 2770
## 4 2011-01-10 00:00:00 2 10 52 2 1 5970
## 5 2011-01-10 00:00:00 3 6 2 1 1 10660
## 6 2011-01-10 00:00:00 3 6 50 2 1 3200
## # ℹ 8 more variables: total_price <dbl>, model <chr>, category_1 <chr>,
## # category_2 <chr>, frame_material <chr>, bikeshop_name <chr>, city <chr>,
## # state <chr>
Revenue by Category 2
# Bar plot
# Generate total sales by category 2
revenue_by_category2_tbl <- bike_orderlines_wrangled_tbl %>%
select(category_2, total_price) %>%
group_by(category_2) %>%
summarise(revenue = sum(total_price)) %>%
ungroup()
# bar plot
revenue_by_category2_tbl %>%
mutate(category_2 = category_2 %>% as_factor() %>% fct_reorder(revenue)) %>%
ggplot(aes(category_2, revenue)) +
geom_col(fill = "darkgreen") +
coord_flip()
