Run this chunk only if you need to install packages. It is set to
eval = FALSE so it will not run during rendering.
install.packages("readxl")
install.packages("tidyverse") # Only run once
install.packages("writexl")
install.packages("shiny")
install.packages("remotes")
remotes::install_github("posit-dev/publisher")
# devtools if needed
if (!requireNamespace("devtools", quietly = TRUE)) {
install.packages("devtools")
}
library(tidyverse)
library(readxl)
library(writexl)
library(shiny)
library(lubridate)
library(tidyquant)
library(scales)
library(stringr)
# other optional libs used in the original
library(packrat)
library(rsconnect)
library(knitr)
Paths are relative to the working directory. Adjust if needed.
bikes_tbl <- read_excel("./bikes.xlsx")
bikeshops_tbl <- read_excel("./bikeshops.xlsx")
orderlines_tbl <- read_excel("./orderlines.xlsx")
# Examine data
bikes_tbl
head(bikes_tbl)
# Import csv file
bike_orderlines_tbl <- read_csv("./bike_orderlines.csv")
orderlines_bikes_tbl <- left_join(orderlines_tbl, bikes_tbl, by = c("product.id" = "bike.id"))
bike_orderlines_bikeshops_joined <- left_join(orderlines_bikes_tbl, bikeshops_tbl,
by = c('customer.id' = 'bikeshop.id'))
# Or using pipes (the script uses this pattern later)
bike_orderlines_bikeshops_joined <- left_join(orderlines_tbl, bikes_tbl, by = c("product.id" = "bike.id")) %>%
left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id"))
Decompose description and location, create total price, clean names, and save RDS.
bike_orderlines_wrangled_tbl <- bike_orderlines_bikeshops_joined %>%
separate(description,
into = c('category.1', 'category.2', 'frame.material'),
sep = ' - ') %>%
separate(location,
into = c('city', 'state'),
sep = ', ',
remove = FALSE) %>%
mutate(total.price = price * quantity) %>%
# Reorganize columns
select(-...1, -location) %>%
# Reorder columns
select(contains('date'), contains('id'),
contains('order'),
quantity, price, total.price,
everything()) %>%
# Rename columns
rename(order_date = order.date) %>%
set_names(names(.) %>% str_replace_all("\\.", "_"))
# save the file as RDS
saveRDS(bike_orderlines_wrangled_tbl, './bike_orderlines.rds')
Small examples showing pull(), select_if(), arrange(), filter(), slice(), distinct(), mutate(), ntile(), case_when().
# pull() vs. select()
bike_orderlines_wrangled_tbl %>%
pull(total_price) %>%
mean()
## [1] 4540.548
# select_if
bike_orderlines_wrangled_tbl %>%
select_if(is.numeric)
# arrange() and desc()
bikes_tbl %>%
select(model, price) %>%
arrange(desc(price))
# filter()
bikes_tbl %>%
select(model, price) %>%
filter(price > mean(price))
bikes_tbl %>%
select(model, price) %>%
filter((price > 5000) & (price < 10000)) %>%
arrange(desc(price))
bikes_tbl %>%
select(model, price) %>%
filter(price > 6000,
model %>% str_detect("Supersix"))
# Filtering using %in%
bike_orderlines_wrangled_tbl %>%
filter(category_2 %in% c("Over Mountain", "Trail", "Endurance Road"))
# slice()
bikes_tbl %>%
arrange(desc(price)) %>%
slice((nrow(.)-4):nrow(.))
# distinct()
bike_orderlines_wrangled_tbl %>%
distinct(category_1, category_2)
# mutate()
bike_orderlines_wrangled_tbl %>%
mutate(total_price_log = log(total_price)) %>%
mutate(total_price_sqrt = total_price^0.5)
# Binning with ntile()
bike_orderlines_wrangled_tbl %>%
mutate(total_price_binned = ntile(total_price, 3))
# case_when()
bike_orderlines_wrangled_tbl %>%
mutate(total_price_binned = ntile(total_price, 3)) %>%
mutate(total_price_binned2 = case_when(
total_price > quantile(total_price, 0.75) ~ "High",
total_price > quantile(total_price, 0.25) ~ "Medium",
TRUE ~ "Low"
))
bike_orderlines_wrangled_tbl %>%
summarise(revenue = sum(total_price))
bike_orderlines_wrangled_tbl %>%
group_by(category_1) %>%
summarise(revenue = sum(total_price)) %>%
ungroup() %>%
arrange(desc(revenue))
bike_orderlines_wrangled_tbl %>%
group_by(category_1, category_2, frame_material) %>%
summarise(revenue = sum(total_price)) %>%
ungroup() %>%
arrange(desc(revenue))
Q1: What are the unique categories of products?
bike_orderlines_wrangled_tbl %>% distinct(category_1)
bike_orderlines_wrangled_tbl %>% distinct(category_2)
bike_orderlines_wrangled_tbl %>% distinct(frame_material)
Q2: Which product categories have the largest sales?
bike_orderlines_wrangled_tbl %>%
select(category_1, total_price) %>%
group_by(category_1) %>%
summarise(sales = sum(total_price)) %>%
ungroup() %>%
rename(`Primary Category` = category_1,
Sales = sales) %>%
# format dollars
mutate(Sales1 = Sales %>% scales::dollar())
# Generate total sales by year (ensure order_date is a proper date)
str(bike_orderlines_wrangled_tbl)
## tibble [15,644 × 15] (S3: tbl_df/tbl/data.frame)
## $ order_date : POSIXct[1:15644], format: "2011-01-07" "2011-01-07" ...
## $ order_id : num [1:15644] 1 1 2 2 3 3 3 3 3 4 ...
## $ customer_id : num [1:15644] 2 2 10 10 6 6 6 6 6 22 ...
## $ product_id : num [1:15644] 48 52 76 52 2 50 1 4 34 26 ...
## $ order_line : num [1:15644] 1 2 1 2 1 2 3 4 5 1 ...
## $ quantity : num [1:15644] 1 1 1 1 1 1 1 1 1 1 ...
## $ price : num [1:15644] 6070 5970 2770 5970 10660 ...
## $ total_price : num [1:15644] 6070 5970 2770 5970 10660 ...
## $ model : chr [1:15644] "Jekyll Carbon 2" "Trigger Carbon 2" "Beast of the East 1" "Trigger Carbon 2" ...
## $ category_1 : chr [1:15644] "Mountain" "Mountain" "Mountain" "Mountain" ...
## $ category_2 : chr [1:15644] "Over Mountain" "Over Mountain" "Trail" "Over Mountain" ...
## $ frame_material: chr [1:15644] "Carbon" "Carbon" "Aluminum" "Carbon" ...
## $ bikeshop_name : chr [1:15644] "Ithaca Mountain Climbers" "Ithaca Mountain Climbers" "Kansas City 29ers" "Kansas City 29ers" ...
## $ city : chr [1:15644] "Ithaca" "Ithaca" "Kansas City" "Kansas City" ...
## $ state : chr [1:15644] "NY" "NY" "KS" "KS" ...
bike_sales_y <- bike_orderlines_wrangled_tbl %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year = year(order_date)) %>%
group_by(year) %>%
summarise(sales = sum(total_price)) %>%
ungroup()
# Monthly aggregation
bike_sales_m <- bike_orderlines_wrangled_tbl %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_month = floor_date(order_date, unit = "month")) %>%
group_by(year_month) %>%
summarise(sales = sum(total_price))
The original script defines a helper to calculate percent differences; keep that as a reusable function.
calculate_pct_diff <- function(data){
data %>%
mutate(sales_lag_1 = lag(sales, n = 1)) %>%
mutate(sales_lag_1 = case_when(
is.na(sales_lag_1) ~ sales,
TRUE ~ sales_lag_1
)) %>%
mutate(diff_1 = sales - sales_lag_1) %>%
mutate(pct_diff_1 = diff_1 / sales_lag_1) %>%
mutate(pct_diff_1_chr = scales::percent(pct_diff_1))
}
calculate_pct_diff(bike_sales_m)
Example: total sales by year.
bike_sales_y %>%
ggplot(aes(x = year, y = sales, color = sales)) +
geom_point(size = 5) +
geom_line(linewidth = 2) +
geom_smooth(method = "lm", formula = 'y ~ x', se = FALSE) +
expand_limits(y = c(0, 20e6)) +
scale_colour_continuous(low = "red", high = "black",
labels = scales::dollar_format(scale = 1/1e6, suffix = "M")) +
scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, suffix = "M")) +
labs(
title = "Revenue",
subtitle = "Sales are trending up and to the right!",
x = "year",
y = "Sales (Millions)",
color = "Rev ($M)",
caption = "Total sales from 2011 to 2015"
)
Bar plot: revenue by category_2.
revenue_by_category2_tbl <- bike_orderlines_wrangled_tbl %>%
select(category_2, total_price) %>%
group_by(category_2) %>%
summarise(revenue = sum(total_price)) %>%
ungroup()
revenue_by_category2_tbl %>%
mutate(category_2 = category_2 %>% as_factor() %>% fct_reorder(desc(revenue))) %>%
ggplot(aes(category_2, revenue)) +
geom_col(fill = "blue")+
coord_flip()
Scatter plot: order value.
order_value_tbl <- bike_orderlines_wrangled_tbl %>%
select(order_id, order_line, total_price, quantity) %>%
group_by(order_id) %>%
summarize(
total_quantity = sum(quantity),
total_price = sum(total_price)
) %>%
ungroup()
order_value_tbl %>%
ggplot(aes(x = total_quantity, y = total_price)) +
geom_point(alpha = 0.5, size = 2)
Histograms, density, boxplots and heatmaps are preserved below.
# Histogram of prices
bike_orderlines_wrangled_tbl %>%
distinct(model, price) %>%
ggplot(aes(price)) +
geom_histogram(bins = 30, fill = "blue", color = "white")
# Density by frame material
bike_orderlines_wrangled_tbl %>%
distinct(price, model, frame_material) %>%
ggplot(aes(price, fill = frame_material)) +
geom_density(alpha = 0.5)+
scale_fill_tq() +
theme_tq()
# Boxplot: unit price by category_2
unit_price_by_cat2_tbl <- bike_orderlines_wrangled_tbl %>%
select(category_2, model, price) %>%
distinct() %>%
mutate(category_2 = as_factor(category_2) %>% fct_reorder(price))
unit_price_by_cat2_tbl %>%
ggplot(aes(category_2, price)) +
geom_boxplot() +
coord_flip() +
theme_tq()
Heatmap of purchasing habits:
pct_sales_by_customer_tbl <- bike_orderlines_wrangled_tbl %>%
select(bikeshop_name, category_1, category_2, quantity) %>%
group_by(bikeshop_name, category_1, category_2) %>%
summarise(total_qty = sum(quantity)) %>%
ungroup() %>%
group_by(bikeshop_name) %>%
mutate(pct = total_qty / sum(total_qty)) %>%
ungroup() %>%
mutate(bikeshop_name = as.factor(bikeshop_name) %>% fct_rev()) %>%
mutate(bikeshop_name_num = as.numeric(bikeshop_name))
pct_sales_by_customer_tbl %>%
ggplot(aes(category_2, bikeshop_name)) +
geom_tile(aes(fill = pct)) +
geom_text(aes(label = scales::percent(pct)), size = 3)+
scale_fill_gradient(low = "white", high = "black") +
facet_wrap(~ category_1) +
labs(
title = "Heatmap of purchasing habits",
x = "Bike type (category 2)",
y = "Customer"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Examples of str_detect, str_c, str_glue, and separation.
bikes_tbl %>%
select(model) %>%
mutate(supersix = model %>% str_detect("Supersix") %>% as.numeric()) %>%
mutate(black = model %>% str_detect("Black") %>% as.numeric())
order_id <- 1
order_line <- 1
str_c("Order Line: ", order_id, ".", order_line)
## [1] "Order Line: 1.1"
str_glue("Order Line: {order_id}.{order_line}")
## Order Line: 1.1
bike_orderlines_tbl %>%
select(bikeshop_name, order_id, order_line) %>%
mutate(purchase_statement = str_glue(
"Order Line: {order_id}.{order_line} sent to Customer: {str_to_upper(bikeshop_name)}"
) %>% as.character())
This section preserves the function from your script that cleans and
decomposes model into base and tier features.
test <- bikes_tbl %>% select(model) %>%
mutate(model = case_when(
model == "CAAD Disc Ultegra" ~ "CAAD12 Disc Ultegra",
model == "Supersix Evo Hi-Mod Utegra" ~ "Supersix Evo Hi-Mod Ultegra",
model == "Syapse Carbon Tiagra" ~ "Synapse Carbon Tiagra",
TRUE ~ model)
) %>%
separate(col = model,
into = str_c("model_", 1:7),
sep = " ",
remove = FALSE,
fill = "right"
) %>%
mutate(model_base = case_when(
str_detect(str_to_lower(model_1), "supersix") ~ str_c(model_1, model_2, sep = " "),
str_detect(str_to_lower(model_1), "beast") ~ str_c(model_1, model_2, model_3, model_4, sep = " "),
str_detect(str_to_lower(model_1), "bad") ~ str_c(model_1, model_2, sep = " "),
str_detect(str_to_lower(model_1), "fat") ~ str_c(model_1, model_2, sep = " "),
str_detect(str_to_lower(model_1), "29") ~ str_c(model_1, model_2, sep = " "),
TRUE ~ model_1
)) %>%
mutate(model_tier = model %>% str_replace(model_base, replacement = "") %>% str_trim()) %>%
select(-matches("model_[0-9]")) %>%
mutate(black = model_tier %>% str_to_lower() %>% str_detect("black") %>% as.numeric(),
red = model_tier %>% str_to_lower() %>% str_detect("red") %>% as.numeric(),
hi_mod = model_tier %>% str_to_lower() %>% str_detect("hi_mod") %>% as.numeric(),
team = model_tier %>% str_to_lower() %>% str_detect("team") %>% as.numeric(),
ultegra = model_tier %>% str_to_lower() %>% str_detect("ultegra") %>% as.numeric(),
dura_ace = model_tier %>% str_to_lower() %>% str_detect("dura_ace") %>% as.numeric(),
disc = model_tier %>% str_to_lower() %>% str_detect("disc") %>% as.numeric())
Create the reusable function.
separate_bike_model <- function(data, append = TRUE) {
if (!append){
data <- data %>% select(model)
}
output_tbl <- data %>% select(model) %>%
mutate(model = case_when(
model == "CAAD Disc Ultegra" ~ "CAAD12 Disc Ultegra",
model == "Supersix Evo Hi-Mod Utegra" ~ "Supersix Evo Hi-Mod Ultegra",
model == "Syapse Carbon Tiagra" ~ "Synapse Carbon Tiagra",
TRUE ~ model)
) %>%
separate(col = model,
into = str_c("model_", 1:7),
sep = " ",
remove = FALSE,
fill = "right"
) %>%
mutate(model_base = case_when(
str_detect(str_to_lower(model_1), "supersix") ~ str_c(model_1, model_2, sep = " "),
str_detect(str_to_lower(model_1), "beast") ~ str_c(model_1, model_2, model_3, model_4, sep = " "),
str_detect(str_to_lower(model_1), "bad") ~ str_c(model_1, model_2, sep = " "),
str_detect(str_to_lower(model_1), "fat") ~ str_c(model_1, model_2, sep = " "),
str_detect(str_to_lower(model_1), "29") ~ str_c(model_1, model_2, sep = " "),
TRUE ~ model_1
)) %>%
mutate(model_tier = model %>% str_replace(model_base, replacement = "") %>% str_trim()) %>%
select(-matches("model_[0-9]")) %>%
mutate(black = model_tier %>% str_to_lower() %>% str_detect("black") %>% as.numeric(),
red = model_tier %>% str_to_lower() %>% str_detect("red") %>% as.numeric(),
hi_mod = model_tier %>% str_to_lower() %>% str_detect("hi_mod") %>% as.numeric(),
team = model_tier %>% str_to_lower() %>% str_detect("team") %>% as.numeric(),
ultegra = model_tier %>% str_to_lower() %>% str_detect("ultegra") %>% as.numeric(),
dura_ace = model_tier %>% str_to_lower() %>% str_detect("dura_ace") %>% as.numeric(),
disc = model_tier %>% str_to_lower() %>% str_detect("disc") %>% as.numeric())
return(output_tbl)
}
separate_bike_model(bikes_tbl)
The original script referenced ui but didn’t define it.
Below is a minimal UI placeholder so shinyApp(ui, server)
runs when used interactively. Replace with your real UI/server code.
server <- function(input, output, session) {
# server logic here
}
ui <- fluidPage(
titlePanel("HW12 FDB Shiny App (placeholder)"),
sidebarLayout(
sidebarPanel(
helpText("Replace with inputs")
),
mainPanel(
tableOutput("sample")
)
)
)
# shinyApp(ui = ui, server = server) # run interactively, not during render
These lines are included from the original; run them interactively if you need to install publisher or related packages.
install.packages("remotes")
remotes::install_github("posit-dev/publisher")
if (!requireNamespace("remotes", quietly = TRUE)) {
install.packages("remotes")
}
if (!requireNamespace("devtools", quietly = TRUE)) {
install.packages("devtools")
}
remotes::install_github("posit-dev/publisher")