## Rows: 15,644
## Columns: 13
## $ order_date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 201...
## $ order_id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6...
## $ order_line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4...
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1...
## $ price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 15...
## $ total_price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 15...
## $ model <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the...
## $ category_1 <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Roa...
## $ category_2 <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mou...
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", ...
## $ bikeshop_name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climber...
## $ city <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "L...
## $ state <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY"...
# Sales by Year
bike_sales_y_tbl <- bike_orderlines_tbl %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year = year(order_date)) %>%
group_by(year) %>%
summarize(sales = sum(total_price)) %>%
ungroup()
# Sales by Month
bike_sales_m_tbl <- bike_orderlines_tbl %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(
year = year(order_date),
month = month(order_date, label = TRUE, abbr = TRUE)
) %>%
group_by(year, month) %>%
summarize(sales = sum(total_price)) %>%
ungroup()
bike_orderlines_tbl %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_month = floor_date(order_date, unit = "month")) %>%
group_by(year_month) %>%
summarize(sales = sum(total_price))
## # A tibble: 60 x 2
## year_month sales
## <date> <dbl>
## 1 2011-01-01 483015
## 2 2011-02-01 1162075
## 3 2011-03-01 659975
## 4 2011-04-01 1827140
## 5 2011-05-01 844170
## 6 2011-06-01 1413445
## 7 2011-07-01 1194430
## 8 2011-08-01 679790
## 9 2011-09-01 814720
## 10 2011-10-01 734920
## # ... with 50 more rows
bike_sales_y_tbl %>%
mutate(sales_lag_1 = lag(sales, n = 1)) %>%
mutate(sales_lag_1 = case_when(is.na(sales_lag_1) ~ sales, TRUE ~ sales_lag_1)) %>%
mutate(diff_1 = sales - sales_lag_1) %>%
mutate(pct_diff_1 = diff_1 / sales_lag_1) %>%
mutate(pct_diff_1_chr = scales::percent(pct_diff_1))
## # A tibble: 5 x 6
## year sales sales_lag_1 diff_1 pct_diff_1 pct_diff_1_chr
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2011 11292885 11292885 0 0 0.0%
## 2 2012 12163075 11292885 870190 0.0771 7.7%
## 3 2013 16480775 12163075 4317700 0.355 35.5%
## 4 2014 13924085 16480775 -2556690 -0.155 -15.5%
## 5 2015 17171510 13924085 3247425 0.233 23.3%
# TP to TP Sales Diff Function
calculate_pct_diff <- function(data) {
data %>%
mutate(sales_lag_1 = lag(sales, n = 1)) %>%
mutate(sales_lag_1 = case_when(is.na(sales_lag_1) ~ sales, TRUE ~ sales_lag_1)) %>%
mutate(diff_1 = sales - sales_lag_1) %>%
mutate(pct_diff_1 = diff_1 / sales_lag_1) %>%
mutate(pct_diff_1_chr = scales::percent(pct_diff_1))
}
bike_sales_m_tbl %>%
calculate_pct_diff()
## # A tibble: 60 x 7
## year month sales sales_lag_1 diff_1 pct_diff_1 pct_diff_1_chr
## <dbl> <ord> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2011 Jan 483015 483015 0 0 0.000%
## 2 2011 Feb 1162075 483015 679060 1.41 140.588%
## 3 2011 Mar 659975 1162075 -502100 -0.432 -43.207%
## 4 2011 Apr 1827140 659975 1167165 1.77 176.850%
## 5 2011 May 844170 1827140 -982970 -0.538 -53.798%
## 6 2011 Jun 1413445 844170 569275 0.674 67.436%
## 7 2011 Jul 1194430 1413445 -219015 -0.155 -15.495%
## 8 2011 Aug 679790 1194430 -514640 -0.431 -43.087%
## 9 2011 Sep 814720 679790 134930 0.198 19.849%
## 10 2011 Oct 734920 814720 -79800 -0.0979 -9.795%
## # ... with 50 more rows
# Sales From Base Year
bike_sales_y_tbl %>%
mutate(sales_2011 = first(sales)) %>%
mutate(diff_2011 = sales - sales_2011) %>%
mutate(pct_diff_2011 = diff_2011 / sales_2011) %>%
mutate(pct_diff_2011_chr = scales::percent(pct_diff_2011))
## # A tibble: 5 x 6
## year sales sales_2011 diff_2011 pct_diff_2011 pct_diff_2011_chr
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2011 11292885 11292885 0 0 0.0%
## 2 2012 12163075 11292885 870190 0.0771 7.7%
## 3 2013 16480775 11292885 5187890 0.459 45.9%
## 4 2014 13924085 11292885 2631200 0.233 23.3%
## 5 2015 17171510 11292885 5878625 0.521 52.1%
bike_sales_m_tbl %>%
group_by(year) %>%
mutate(sales_jan = first(sales)) %>%
mutate(
diff_jan = sales - sales_jan,
pct_diff_jan = diff_jan / sales_jan,
pct_diff_jan_chr = scales::percent(pct_diff_jan)
)
## # A tibble: 60 x 7
## # Groups: year [5]
## year month sales sales_jan diff_jan pct_diff_jan pct_diff_jan_chr
## <dbl> <ord> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2011 Jan 483015 483015 0 0 0.0%
## 2 2011 Feb 1162075 483015 679060 1.41 140.6%
## 3 2011 Mar 659975 483015 176960 0.366 36.6%
## 4 2011 Apr 1827140 483015 1344125 2.78 278.3%
## 5 2011 May 844170 483015 361155 0.748 74.8%
## 6 2011 Jun 1413445 483015 930430 1.93 192.6%
## 7 2011 Jul 1194430 483015 711415 1.47 147.3%
## 8 2011 Aug 679790 483015 196775 0.407 40.7%
## 9 2011 Sep 814720 483015 331705 0.687 68.7%
## 10 2011 Oct 734920 483015 251905 0.522 52.2%
## # ... with 50 more rows
bike_sales_y_tbl %>%
mutate(cumulative_sales = cumsum(sales)) %>%
mutate(cumulative_sales_pct = cumulative_sales / sum(sales)) %>%
mutate(cumulative_sales_pct_chr = cumulative_sales_pct %>% scales::percent())
## # A tibble: 5 x 5
## year sales cumulative_sales cumulative_sales_pct cumulative_sales_pct_chr
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2011 11292885 11292885 0.159 16%
## 2 2012 12163075 23455960 0.330 33%
## 3 2013 16480775 39936735 0.562 56%
## 4 2014 13924085 53860820 0.758 76%
## 5 2015 17171510 71032330 1 100%
bike_sales_m_tbl %>%
group_by(year) %>%
mutate(cumulative_sales = cumsum(sales)) %>%
mutate(cumulative_sales_pct = cumulative_sales / sum(sales)) %>%
mutate(cumulative_sales_pct_chr = scales::percent(cumulative_sales_pct))
## # A tibble: 60 x 6
## # Groups: year [5]
## year month sales cumulative_sales cumulative_sales_p~ cumulative_sales_pc~
## <dbl> <ord> <dbl> <dbl> <dbl> <chr>
## 1 2011 Jan 483015 483015 0.0428 4.3%
## 2 2011 Feb 1162075 1645090 0.146 14.6%
## 3 2011 Mar 659975 2305065 0.204 20.4%
## 4 2011 Apr 1827140 4132205 0.366 36.6%
## 5 2011 May 844170 4976375 0.441 44.1%
## 6 2011 Jun 1413445 6389820 0.566 56.6%
## 7 2011 Jul 1194430 7584250 0.672 67.2%
## 8 2011 Aug 679790 8264040 0.732 73.2%
## 9 2011 Sep 814720 9078760 0.804 80.4%
## 10 2011 Oct 734920 9813680 0.869 86.9%
## # ... with 50 more rows
# Rolling Calculations
bike_sales_m_tbl %>%
mutate(roll_mean_3 = rollmean(sales, k = 3, na.pad = TRUE, align = "right", fill = NA)) %>%
mutate(roll_mean_6 = rollmean(sales, k = 6, na.pad = TRUE, align = "right", fill = NA))
## # A tibble: 60 x 5
## year month sales roll_mean_3 roll_mean_6
## <dbl> <ord> <dbl> <dbl> <dbl>
## 1 2011 Jan 483015 NA NA
## 2 2011 Feb 1162075 NA NA
## 3 2011 Mar 659975 768355 NA
## 4 2011 Apr 1827140 1216397. NA
## 5 2011 May 844170 1110428. NA
## 6 2011 Jun 1413445 1361585 1064970
## 7 2011 Jul 1194430 1150682. 1183539.
## 8 2011 Aug 679790 1095888. 1103158.
## 9 2011 Sep 814720 896313. 1128949.
## 10 2011 Oct 734920 743143. 946912.
## # ... with 50 more rows
bike_orderlines_tbl %>%
mutate(order_date = ymd(order_date)) %>%
filter(order_date %>% between(left = ymd("2012-01-01"), right = ymd("2013-12-31")))
## # A tibble: 6,311 x 13
## order_date order_id order_line quantity price total_price model category_1
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 2012-01-03 323 1 1 3200 3200 Slic~ Road
## 2 2012-01-03 323 2 1 2130 2130 Fat ~ Mountain
## 3 2012-01-03 323 3 1 1620 1620 Beas~ Mountain
## 4 2012-01-03 323 4 2 1410 2820 CAAD~ Road
## 5 2012-01-03 323 5 1 3200 3200 Jeky~ Mountain
## 6 2012-01-03 324 1 1 3200 3200 Trig~ Mountain
## 7 2012-01-03 324 2 2 1950 3900 CAAD~ Road
## 8 2012-01-03 324 3 1 815 815 CAAD~ Road
## 9 2012-01-03 324 4 1 815 815 Trai~ Mountain
## 10 2012-01-03 324 5 1 2880 2880 F-Si~ Mountain
## # ... with 6,301 more rows, and 5 more variables: category_2 <chr>,
## # frame_material <chr>, bikeshop_name <chr>, city <chr>, state <chr>
bike_orderlines_tbl %>%
mutate(order_date = ymd(order_date)) %>%
filter(year(order_date) %in% c(2012, 2013))
## # A tibble: 6,311 x 13
## order_date order_id order_line quantity price total_price model category_1
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 2012-01-03 323 1 1 3200 3200 Slic~ Road
## 2 2012-01-03 323 2 1 2130 2130 Fat ~ Mountain
## 3 2012-01-03 323 3 1 1620 1620 Beas~ Mountain
## 4 2012-01-03 323 4 2 1410 2820 CAAD~ Road
## 5 2012-01-03 323 5 1 3200 3200 Jeky~ Mountain
## 6 2012-01-03 324 1 1 3200 3200 Trig~ Mountain
## 7 2012-01-03 324 2 2 1950 3900 CAAD~ Road
## 8 2012-01-03 324 3 1 815 815 CAAD~ Road
## 9 2012-01-03 324 4 1 815 815 Trai~ Mountain
## 10 2012-01-03 324 5 1 2880 2880 F-Si~ Mountain
## # ... with 6,301 more rows, and 5 more variables: category_2 <chr>,
## # frame_material <chr>, bikeshop_name <chr>, city <chr>, state <chr>