```{# 1. Install packages install.packages(“readxl”) install.packages(“tidyverse”) # Only run once library(tidyverse)
library(readxl) library(writexl)
bikes_tbl <- read_excel(“./bikes.xlsx”) # fast key: alt+- bikeshops_tbl <- read_excel(“./bikeshops.xlsx”) orderlines_tbl <- read_excel(“./orderlines.xlsx”)
bikes_tbl head(bikes_tbl)
bike_orderlines_tbl <- read_csv(“./bike_orderlines.csv”)
orderlines_bikes_tbl <- left_join(orderlines_tbl, bikes_tbl, by = c(“product.id” = “bike.id”))
bike_orderlines_bikeshops_joined <- left_join(orderlines_bikes_tbl, bikeshops_tbl, by = c(‘customer.id’ = ‘bikeshop.id’))
bike_orderlines_bikeshops_joined <- left_join(orderlines_tbl, bikes_tbl, by = c(“product.id” = “bike.id”)) %>% left_join(bikeshops_tbl, by = c(“customer.id” = “bikeshop.id”))
bike_orderlines_wrangled_tbl <- bike_orderlines_bikeshops_joined
%>% separate(description, into = c(‘category.1’, ‘category.2’,
‘frame.material’), sep = ’ - ‘) %>% separate(location, into =
c(’city’, ‘state’), sep = ‘,’, remove = FALSE) %>% # create
calculated columns mutate(total.price = price * quantity) %>% #
Reorganize columns
select(-…1, -location) %>% # Reorder columns
select(contains(‘date’), contains(‘id’), contains(‘order’), quantity,
price, total.price, everything()) %>% # Rename columns
rename(order_date = order.date) %>% set_names(names(.) %>%
str_replace_all(“\.”, “_“)) # save the file as RDS
saveRDS(bike_orderlines_wrangled_tbl, ‘./bike_orderlines.rds’)
bike_orderlines_wrangled_tbl %>% # select(total_price) pull(total_price) %>% mean() # select_if bike_orderlines_wrangled_tbl %>% # select_if(is.character) select_if(is.numeric)
bikes_tbl %>% select(model, price) %>% arrange(desc(price))
bikes_tbl %>% select(model, price) %>% filter(price > mean(price))
bikes_tbl %>% select(model, price) %>% filter((price > 5000)
& (price < 10000)) %>%
arrange(desc(price))
bikes_tbl %>% select(model, price) %>% filter(price > 6000, model %>% str_detect(“Supersix”))
bike_orderlines_wrangled_tbl %>% filter(category_2 %in% c(“Over Mountain”, “Trail”, “Endurance Road”)) %>% View()
bikes_tbl %>% arrange(desc(price)) %>% # slice(1:5) slice((nrow(.)-4):nrow(.))
bike_orderlines_wrangled_tbl %>% distinct(category_1, category_2) %>% View()
bike_orderlines_wrangled_tbl %>% mutate(total_price_log = log(total_price)) %>% mutate(total_price_sqrt = total_price^0.5) %>% View()
bike_orderlines_wrangled_tbl %>% mutate(total_price_binned = ntile(total_price, 3)) %>% View()
bike_orderlines_wrangled_tbl %>% mutate(total_price_binned = ntile(total_price, 3)) %>% mutate(total_price_binned2 = case_when( total_price > quantile(total_price, 0.75) ~ “High”, total_price > quantile(total_price, 0.25) ~ “Medium”, TRUE ~ “Low” )) %>% View()
bike_orderlines_wrangled_tbl %>% summarise(revenue = sum(total_price))
bike_orderlines_wrangled_tbl %>% group_by(category_1) %>% summarise(revenue = sum(total_price)) %>% ungroup() %>% arrange(desc(revenue))
bike_orderlines_wrangled_tbl %>%
group_by(category_1, category_2, frame_material) %>%
summarise(revenue = sum(total_price)) %>% ungroup() %>%
arrange(desc(revenue))
bike_orderlines_wrangled_tbl %>% distinct(category_1) bike_orderlines_wrangled_tbl %>% distinct(category_2) bike_orderlines_wrangled_tbl %>% distinct(frame_material)
bike_orderlines_wrangled_tbl %>% select(category_1, total_price)
%>% group_by(category_1) %>% summarise(sales = sum(total_price))
%>% ungroup() %>% rename(Primary Category =
category_1, Sales = sales) %>% # format dollars mutate(Sales1 = Sales
%>% scales::dollar())