rm(list=ls())
library(tidyverse) library(lubridate)
install.packages(“tidyverse”) install.packages(“lubridate”)
library(tidyquant)
library(readxl) library(writexl)
install.packages(“writexl”)
?read_excel()
bikes_tbl <- read_excel(path = “bikes.xlsx”)
bikeshops_tbl <- read_excel(“bikeshops.xlsx”)
orderlines_tbl <- read_excel(“orderlines.xlsx”)
install.packages(“dplyr”) library(dplyr)
bikes_tbl
glimpse(bikes_tbl)
bikeshops_tbl
orderlines_tbl
?left_join
orderlines_tbl
bikes_tbl
left_join(orderlines_tbl, bikes_tbl, by = c(“product.id” = “bike.id”))
bike_orderlines_joined_tbl <- orderlines_tbl %>% left_join(bikes_tbl, by = c(“product.id” = “bike.id”)) %>% left_join(bikeshops_tbl, by = c(“customer.id” = “bikeshop.id”))
bike_orderlines_joined_tbl
bike_orderlines_joined_tbl %>% glimpse()
bike_orderlines_wrangled_tbl <- bike_orderlines_joined_tbl %>%
install.packages(“separate”)
separate(description, into = c(“category.1”, “category.2”, “frame.material”), sep = " - “, remove = TRUE) %>%
# Separate location into city and state separate(location, into = c(“city”, “state”), sep = “,”, remove = FALSE) %>%
# price extended mutate(total.price = price * quantity) %>%
# Reorganize select(-X__1, -location) %>% select(-ends_with(“.id”)) %>%
bind_cols(bike_orderlines_joined_tbl %>% select(order.id)) %>%
# Reorder columns select(contains(“date”), contains(“id”), contains(“order”), quantity, price, total.price, everything()) %>%
# Renaming columns rename(order_date = order.date) %>% set_names(names(.) %>% str_replace_all(“\.”, “_“))
bike_orderlines_wrangled_tbl %>% glimpse()
sales_by_year_tbl <- bike_orderlines_wrangled_tbl %>%
# Selecting columns to focus on and adding a year column select(order_date, total_price) %>% mutate(year = year(order_date)) %>%
# Grouping by year, and summarizing sales group_by(year) %>% summarize(sales = sum(total_price)) %>% ungroup() %>%
# $ Format Text mutate(sales_text = scales::dollar(sales))
sales_by_year_tbl
sales_by_year_tbl %>%
# Setup canvas with year (x-axis) and sales (y-axis) ggplot(aes(x = year, y = sales)) +
# Geometries geom_col(fill = “#2c3e50”) + geom_label(aes(label = sales_text)) + geom_smooth(method = “lm”, se = FALSE) +
# Formatting theme_tq() + scale_y_continuous(labels = scales::dollar) + labs( title = “Revenue by Year”, subtitle = “Upward trend”, x = “”, y = “Revenue” )
sales_by_year_cat_2_tbl <- bike_orderlines_wrangled_tbl %>%
# Selecting columns and add a year select(order_date, total_price, category_2) %>% mutate(year = year(order_date)) %>%
# Groupby and Summarize year and category 2 group_by(year, category_2) %>% summarise(sales = sum(total_price)) %>% ungroup() %>%
# Format $ Text mutate(sales_text = scales::dollar(sales))
sales_by_year_cat_2_tbl
p<-sales_by_year_cat_2_tbl %>%
# Set up x, y, fill ggplot(aes(x = year, y = sales, fill = category_2)) +
# Geometries geom_col() + geom_smooth(method = “lm”, se = FALSE) +
# Facet facet_wrap(~ category_2, ncol = 4, scales = “free_y”) +
# Formatting theme_tq() + scale_fill_tq() + scale_y_continuous(labels = scales::dollar) + labs( title = “Revenue by Year and Category 2”, subtitle = “Each product category has an upward trend”, x = “”, y = “Revenue”, fill = “Product Secondary Category” )
pdf(“ggplot.pdf”) print(p) dev.off()
ggsave(“myplot.pdf”) ggsave(“myplot.png”) ggsave(“myplot1.png”, plot = p)
fs::dir_create(“data_wrangled_student”)
bike_orderlines_wrangled_tbl %>% write_xlsx(“data_wrangled_student/bike_orderlines.xlsx”)
bike_orderlines_wrangled_tbl %>% write_csv(“data_wrangled_student/bike_orderlines.csv”)
bike_orderlines_wrangled_tbl %>% write_rds(“data_wrangled_student/bike_orderlines.rds”)
install.packages(“knitr”)