## Rows: 15,644
## Columns: 13
## $ order_date     <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 201...
## $ order_id       <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6...
## $ order_line     <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4...
## $ quantity       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1...
## $ price          <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 15...
## $ total_price    <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 15...
## $ model          <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the...
## $ category_1     <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Roa...
## $ category_2     <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mou...
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", ...
## $ bikeshop_name  <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climber...
## $ city           <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "L...
## $ state          <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY"...
# Sales by Year

bike_sales_y_tbl <- bike_orderlines_tbl %>% 
    select(order_date, total_price) %>% 
    mutate(order_date = ymd(order_date)) %>% 
    mutate(year = year(order_date)) %>% 
    group_by(year) %>% 
    summarize(sales = sum(total_price)) %>% 
    ungroup()
# Sales by Month

bike_sales_m_tbl <- bike_orderlines_tbl %>% 
    select(order_date, total_price) %>% 
    mutate(order_date = ymd(order_date)) %>% 
    mutate(
        year  = year(order_date),
        month = month(order_date, label = TRUE, abbr = TRUE)
    ) %>% 
    group_by(year, month) %>% 
    summarize(sales = sum(total_price)) %>% 
    ungroup()
bike_orderlines_tbl %>% 
    select(order_date, total_price) %>% 
    mutate(order_date = ymd(order_date)) %>% 
    mutate(year_month = floor_date(order_date, unit = "month")) %>% 
    group_by(year_month) %>% 
    summarize(sales = sum(total_price))
## # A tibble: 60 x 2
##    year_month   sales
##    <date>       <dbl>
##  1 2011-01-01  483015
##  2 2011-02-01 1162075
##  3 2011-03-01  659975
##  4 2011-04-01 1827140
##  5 2011-05-01  844170
##  6 2011-06-01 1413445
##  7 2011-07-01 1194430
##  8 2011-08-01  679790
##  9 2011-09-01  814720
## 10 2011-10-01  734920
## # ... with 50 more rows
bike_sales_y_tbl %>% 
    mutate(sales_lag_1 = lag(sales, n = 1)) %>% 
    mutate(sales_lag_1 = case_when(is.na(sales_lag_1) ~ sales, TRUE ~ sales_lag_1)) %>% 
    mutate(diff_1 = sales - sales_lag_1) %>% 
    mutate(pct_diff_1 = diff_1 / sales_lag_1) %>%
    mutate(pct_diff_1_chr = scales::percent(pct_diff_1))
## # A tibble: 5 x 6
##    year    sales sales_lag_1   diff_1 pct_diff_1 pct_diff_1_chr
##   <dbl>    <dbl>       <dbl>    <dbl>      <dbl> <chr>         
## 1  2011 11292885    11292885        0     0      0.0%          
## 2  2012 12163075    11292885   870190     0.0771 7.7%          
## 3  2013 16480775    12163075  4317700     0.355  35.5%         
## 4  2014 13924085    16480775 -2556690    -0.155  -15.5%        
## 5  2015 17171510    13924085  3247425     0.233  23.3%
# TP to TP Sales Diff Function

calculate_pct_diff <- function(data) {
    data %>% 
    mutate(sales_lag_1 = lag(sales, n = 1)) %>% 
    mutate(sales_lag_1 = case_when(is.na(sales_lag_1) ~ sales, TRUE ~ sales_lag_1)) %>% 
    mutate(diff_1 = sales - sales_lag_1) %>% 
    mutate(pct_diff_1 = diff_1 / sales_lag_1) %>%
    mutate(pct_diff_1_chr = scales::percent(pct_diff_1))
    
}

bike_sales_m_tbl %>% 
    calculate_pct_diff()
## # A tibble: 60 x 7
##     year month   sales sales_lag_1  diff_1 pct_diff_1 pct_diff_1_chr
##    <dbl> <ord>   <dbl>       <dbl>   <dbl>      <dbl> <chr>         
##  1  2011 Jan    483015      483015       0     0      0.000%        
##  2  2011 Feb   1162075      483015  679060     1.41   140.588%      
##  3  2011 Mar    659975     1162075 -502100    -0.432  -43.207%      
##  4  2011 Apr   1827140      659975 1167165     1.77   176.850%      
##  5  2011 May    844170     1827140 -982970    -0.538  -53.798%      
##  6  2011 Jun   1413445      844170  569275     0.674  67.436%       
##  7  2011 Jul   1194430     1413445 -219015    -0.155  -15.495%      
##  8  2011 Aug    679790     1194430 -514640    -0.431  -43.087%      
##  9  2011 Sep    814720      679790  134930     0.198  19.849%       
## 10  2011 Oct    734920      814720  -79800    -0.0979 -9.795%       
## # ... with 50 more rows
# Sales From Base Year

bike_sales_y_tbl %>% 
    mutate(sales_2011 = first(sales)) %>%
    mutate(diff_2011 = sales - sales_2011) %>% 
    mutate(pct_diff_2011 = diff_2011 / sales_2011) %>% 
    mutate(pct_diff_2011_chr = scales::percent(pct_diff_2011))
## # A tibble: 5 x 6
##    year    sales sales_2011 diff_2011 pct_diff_2011 pct_diff_2011_chr
##   <dbl>    <dbl>      <dbl>     <dbl>         <dbl> <chr>            
## 1  2011 11292885   11292885         0        0      0.0%             
## 2  2012 12163075   11292885    870190        0.0771 7.7%             
## 3  2013 16480775   11292885   5187890        0.459  45.9%            
## 4  2014 13924085   11292885   2631200        0.233  23.3%            
## 5  2015 17171510   11292885   5878625        0.521  52.1%
bike_sales_m_tbl %>%
    group_by(year) %>%
    mutate(sales_jan = first(sales)) %>% 
    mutate(
        diff_jan         = sales - sales_jan,
        pct_diff_jan     = diff_jan / sales_jan,
        pct_diff_jan_chr = scales::percent(pct_diff_jan)
    ) 
## # A tibble: 60 x 7
## # Groups:   year [5]
##     year month   sales sales_jan diff_jan pct_diff_jan pct_diff_jan_chr
##    <dbl> <ord>   <dbl>     <dbl>    <dbl>        <dbl> <chr>           
##  1  2011 Jan    483015    483015        0        0     0.0%            
##  2  2011 Feb   1162075    483015   679060        1.41  140.6%          
##  3  2011 Mar    659975    483015   176960        0.366 36.6%           
##  4  2011 Apr   1827140    483015  1344125        2.78  278.3%          
##  5  2011 May    844170    483015   361155        0.748 74.8%           
##  6  2011 Jun   1413445    483015   930430        1.93  192.6%          
##  7  2011 Jul   1194430    483015   711415        1.47  147.3%          
##  8  2011 Aug    679790    483015   196775        0.407 40.7%           
##  9  2011 Sep    814720    483015   331705        0.687 68.7%           
## 10  2011 Oct    734920    483015   251905        0.522 52.2%           
## # ... with 50 more rows
bike_sales_y_tbl %>%
    mutate(cumulative_sales = cumsum(sales)) %>%
    mutate(cumulative_sales_pct = cumulative_sales / sum(sales)) %>%
    mutate(cumulative_sales_pct_chr = cumulative_sales_pct %>% scales::percent())
## # A tibble: 5 x 5
##    year    sales cumulative_sales cumulative_sales_pct cumulative_sales_pct_chr
##   <dbl>    <dbl>            <dbl>                <dbl> <chr>                   
## 1  2011 11292885         11292885                0.159 16%                     
## 2  2012 12163075         23455960                0.330 33%                     
## 3  2013 16480775         39936735                0.562 56%                     
## 4  2014 13924085         53860820                0.758 76%                     
## 5  2015 17171510         71032330                1     100%
bike_sales_m_tbl %>%
    group_by(year) %>%
    mutate(cumulative_sales = cumsum(sales)) %>%
    mutate(cumulative_sales_pct = cumulative_sales / sum(sales)) %>%
    mutate(cumulative_sales_pct_chr = scales::percent(cumulative_sales_pct))
## # A tibble: 60 x 6
## # Groups:   year [5]
##     year month   sales cumulative_sales cumulative_sales_p~ cumulative_sales_pc~
##    <dbl> <ord>   <dbl>            <dbl>               <dbl> <chr>               
##  1  2011 Jan    483015           483015              0.0428 4.3%                
##  2  2011 Feb   1162075          1645090              0.146  14.6%               
##  3  2011 Mar    659975          2305065              0.204  20.4%               
##  4  2011 Apr   1827140          4132205              0.366  36.6%               
##  5  2011 May    844170          4976375              0.441  44.1%               
##  6  2011 Jun   1413445          6389820              0.566  56.6%               
##  7  2011 Jul   1194430          7584250              0.672  67.2%               
##  8  2011 Aug    679790          8264040              0.732  73.2%               
##  9  2011 Sep    814720          9078760              0.804  80.4%               
## 10  2011 Oct    734920          9813680              0.869  86.9%               
## # ... with 50 more rows
# Rolling Calculations

bike_sales_m_tbl %>%
    
    mutate(roll_mean_3 = rollmean(sales, k = 3, na.pad = TRUE, align = "right", fill = NA)) %>%
    
    mutate(roll_mean_6 = rollmean(sales, k = 6, na.pad = TRUE, align = "right", fill = NA))
## # A tibble: 60 x 5
##     year month   sales roll_mean_3 roll_mean_6
##    <dbl> <ord>   <dbl>       <dbl>       <dbl>
##  1  2011 Jan    483015         NA          NA 
##  2  2011 Feb   1162075         NA          NA 
##  3  2011 Mar    659975     768355          NA 
##  4  2011 Apr   1827140    1216397.         NA 
##  5  2011 May    844170    1110428.         NA 
##  6  2011 Jun   1413445    1361585     1064970 
##  7  2011 Jul   1194430    1150682.    1183539.
##  8  2011 Aug    679790    1095888.    1103158.
##  9  2011 Sep    814720     896313.    1128949.
## 10  2011 Oct    734920     743143.     946912.
## # ... with 50 more rows
bike_orderlines_tbl %>%
    
    mutate(order_date = ymd(order_date)) %>%
    
    filter(order_date %>% between(left = ymd("2012-01-01"), right = ymd("2013-12-31"))) 
## # A tibble: 6,311 x 13
##    order_date order_id order_line quantity price total_price model category_1
##    <date>        <dbl>      <dbl>    <dbl> <dbl>       <dbl> <chr> <chr>     
##  1 2012-01-03      323          1        1  3200        3200 Slic~ Road      
##  2 2012-01-03      323          2        1  2130        2130 Fat ~ Mountain  
##  3 2012-01-03      323          3        1  1620        1620 Beas~ Mountain  
##  4 2012-01-03      323          4        2  1410        2820 CAAD~ Road      
##  5 2012-01-03      323          5        1  3200        3200 Jeky~ Mountain  
##  6 2012-01-03      324          1        1  3200        3200 Trig~ Mountain  
##  7 2012-01-03      324          2        2  1950        3900 CAAD~ Road      
##  8 2012-01-03      324          3        1   815         815 CAAD~ Road      
##  9 2012-01-03      324          4        1   815         815 Trai~ Mountain  
## 10 2012-01-03      324          5        1  2880        2880 F-Si~ Mountain  
## # ... with 6,301 more rows, and 5 more variables: category_2 <chr>,
## #   frame_material <chr>, bikeshop_name <chr>, city <chr>, state <chr>
bike_orderlines_tbl %>%
    
    mutate(order_date = ymd(order_date)) %>%
    
    filter(year(order_date) %in% c(2012, 2013))
## # A tibble: 6,311 x 13
##    order_date order_id order_line quantity price total_price model category_1
##    <date>        <dbl>      <dbl>    <dbl> <dbl>       <dbl> <chr> <chr>     
##  1 2012-01-03      323          1        1  3200        3200 Slic~ Road      
##  2 2012-01-03      323          2        1  2130        2130 Fat ~ Mountain  
##  3 2012-01-03      323          3        1  1620        1620 Beas~ Mountain  
##  4 2012-01-03      323          4        2  1410        2820 CAAD~ Road      
##  5 2012-01-03      323          5        1  3200        3200 Jeky~ Mountain  
##  6 2012-01-03      324          1        1  3200        3200 Trig~ Mountain  
##  7 2012-01-03      324          2        2  1950        3900 CAAD~ Road      
##  8 2012-01-03      324          3        1   815         815 CAAD~ Road      
##  9 2012-01-03      324          4        1   815         815 Trai~ Mountain  
## 10 2012-01-03      324          5        1  2880        2880 F-Si~ Mountain  
## # ... with 6,301 more rows, and 5 more variables: category_2 <chr>,
## #   frame_material <chr>, bikeshop_name <chr>, city <chr>, state <chr>