library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages --------------------------------------------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 3.1.1       √ purrr   0.3.2  
## √ tibble  2.1.1       √ dplyr   0.8.0.1
## √ tidyr   0.8.3       √ stringr 1.4.0  
## √ readr   1.3.1       √ forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'stringr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts ------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.5.3
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.3
library(writexl)
## Warning: package 'writexl' was built under R version 3.5.3
?read_excel()
## starting httpd help server ... done
bikes_tbl <- read_excel("bikes.xlsx")

bikeshops_tbl <- read_excel("bikeshops.xlsx")

orderlines_tbl <- read_excel("orderlines.xlsx")
## New names:
## * `` -> ...1
bikes_tbl
## # A tibble: 97 x 4
##    bike.id model                          description                price
##      <dbl> <chr>                          <chr>                      <dbl>
##  1       1 Supersix Evo Black Inc.        Road - Elite Road - Carbon 12790
##  2       2 Supersix Evo Hi-Mod Team       Road - Elite Road - Carbon 10660
##  3       3 Supersix Evo Hi-Mod Dura Ace 1 Road - Elite Road - Carbon  7990
##  4       4 Supersix Evo Hi-Mod Dura Ace 2 Road - Elite Road - Carbon  5330
##  5       5 Supersix Evo Hi-Mod Utegra     Road - Elite Road - Carbon  4260
##  6       6 Supersix Evo Red               Road - Elite Road - Carbon  3940
##  7       7 Supersix Evo Ultegra 3         Road - Elite Road - Carbon  3200
##  8       8 Supersix Evo Ultegra 4         Road - Elite Road - Carbon  2660
##  9       9 Supersix Evo 105               Road - Elite Road - Carbon  2240
## 10      10 Supersix Evo Tiagra            Road - Elite Road - Carbon  1840
## # ... with 87 more rows
glimpse(bikes_tbl)
## Observations: 97
## Variables: 4
## $ bike.id     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,...
## $ model       <chr> "Supersix Evo Black Inc.", "Supersix Evo Hi-Mod Te...
## $ description <chr> "Road - Elite Road - Carbon", "Road - Elite Road -...
## $ price       <dbl> 12790, 10660, 7990, 5330, 4260, 3940, 3200, 2660, ...
bikeshops_tbl
## # A tibble: 30 x 3
##    bikeshop.id bikeshop.name                location       
##          <dbl> <chr>                        <chr>          
##  1           1 Pittsburgh Mountain Machines Pittsburgh, PA 
##  2           2 Ithaca Mountain Climbers     Ithaca, NY     
##  3           3 Columbus Race Equipment      Columbus, OH   
##  4           4 Detroit Cycles               Detroit, MI    
##  5           5 Cincinnati Speed             Cincinnati, OH 
##  6           6 Louisville Race Equipment    Louisville, KY 
##  7           7 Nashville Cruisers           Nashville, TN  
##  8           8 Denver Bike Shop             Denver, CO     
##  9           9 Minneapolis Bike Shop        Minneapolis, MN
## 10          10 Kansas City 29ers            Kansas City, KS
## # ... with 20 more rows
orderlines_tbl
## # A tibble: 15,644 x 7
##    ...1  order.id order.line order.date          customer.id product.id
##    <chr>    <dbl>      <dbl> <dttm>                    <dbl>      <dbl>
##  1 1            1          1 2011-01-07 00:00:00           2         48
##  2 2            1          2 2011-01-07 00:00:00           2         52
##  3 3            2          1 2011-01-10 00:00:00          10         76
##  4 4            2          2 2011-01-10 00:00:00          10         52
##  5 5            3          1 2011-01-10 00:00:00           6          2
##  6 6            3          2 2011-01-10 00:00:00           6         50
##  7 7            3          3 2011-01-10 00:00:00           6          1
##  8 8            3          4 2011-01-10 00:00:00           6          4
##  9 9            3          5 2011-01-10 00:00:00           6         34
## 10 10           4          1 2011-01-11 00:00:00          22         26
## # ... with 15,634 more rows, and 1 more variable: quantity <dbl>
?left_join

orderlines_tbl
## # A tibble: 15,644 x 7
##    ...1  order.id order.line order.date          customer.id product.id
##    <chr>    <dbl>      <dbl> <dttm>                    <dbl>      <dbl>
##  1 1            1          1 2011-01-07 00:00:00           2         48
##  2 2            1          2 2011-01-07 00:00:00           2         52
##  3 3            2          1 2011-01-10 00:00:00          10         76
##  4 4            2          2 2011-01-10 00:00:00          10         52
##  5 5            3          1 2011-01-10 00:00:00           6          2
##  6 6            3          2 2011-01-10 00:00:00           6         50
##  7 7            3          3 2011-01-10 00:00:00           6          1
##  8 8            3          4 2011-01-10 00:00:00           6          4
##  9 9            3          5 2011-01-10 00:00:00           6         34
## 10 10           4          1 2011-01-11 00:00:00          22         26
## # ... with 15,634 more rows, and 1 more variable: quantity <dbl>
bikes_tbl
## # A tibble: 97 x 4
##    bike.id model                          description                price
##      <dbl> <chr>                          <chr>                      <dbl>
##  1       1 Supersix Evo Black Inc.        Road - Elite Road - Carbon 12790
##  2       2 Supersix Evo Hi-Mod Team       Road - Elite Road - Carbon 10660
##  3       3 Supersix Evo Hi-Mod Dura Ace 1 Road - Elite Road - Carbon  7990
##  4       4 Supersix Evo Hi-Mod Dura Ace 2 Road - Elite Road - Carbon  5330
##  5       5 Supersix Evo Hi-Mod Utegra     Road - Elite Road - Carbon  4260
##  6       6 Supersix Evo Red               Road - Elite Road - Carbon  3940
##  7       7 Supersix Evo Ultegra 3         Road - Elite Road - Carbon  3200
##  8       8 Supersix Evo Ultegra 4         Road - Elite Road - Carbon  2660
##  9       9 Supersix Evo 105               Road - Elite Road - Carbon  2240
## 10      10 Supersix Evo Tiagra            Road - Elite Road - Carbon  1840
## # ... with 87 more rows
left_join(orderlines_tbl, bikes_tbl, by = c("product.id" = "bike.id"))
## # A tibble: 15,644 x 10
##    ...1  order.id order.line order.date          customer.id product.id
##    <chr>    <dbl>      <dbl> <dttm>                    <dbl>      <dbl>
##  1 1            1          1 2011-01-07 00:00:00           2         48
##  2 2            1          2 2011-01-07 00:00:00           2         52
##  3 3            2          1 2011-01-10 00:00:00          10         76
##  4 4            2          2 2011-01-10 00:00:00          10         52
##  5 5            3          1 2011-01-10 00:00:00           6          2
##  6 6            3          2 2011-01-10 00:00:00           6         50
##  7 7            3          3 2011-01-10 00:00:00           6          1
##  8 8            3          4 2011-01-10 00:00:00           6          4
##  9 9            3          5 2011-01-10 00:00:00           6         34
## 10 10           4          1 2011-01-11 00:00:00          22         26
## # ... with 15,634 more rows, and 4 more variables: quantity <dbl>,
## #   model <chr>, description <chr>, price <dbl>
bike_orderlines_joined_tbl <- orderlines_tbl %>%
    left_join(bikes_tbl, by = c("product.id" = "bike.id")) %>%
    left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id"))

bike_orderlines_joined_tbl
## # A tibble: 15,644 x 12
##    ...1  order.id order.line order.date          customer.id product.id
##    <chr>    <dbl>      <dbl> <dttm>                    <dbl>      <dbl>
##  1 1            1          1 2011-01-07 00:00:00           2         48
##  2 2            1          2 2011-01-07 00:00:00           2         52
##  3 3            2          1 2011-01-10 00:00:00          10         76
##  4 4            2          2 2011-01-10 00:00:00          10         52
##  5 5            3          1 2011-01-10 00:00:00           6          2
##  6 6            3          2 2011-01-10 00:00:00           6         50
##  7 7            3          3 2011-01-10 00:00:00           6          1
##  8 8            3          4 2011-01-10 00:00:00           6          4
##  9 9            3          5 2011-01-10 00:00:00           6         34
## 10 10           4          1 2011-01-11 00:00:00          22         26
## # ... with 15,634 more rows, and 6 more variables: quantity <dbl>,
## #   model <chr>, description <chr>, price <dbl>, bikeshop.name <chr>,
## #   location <chr>
bike_orderlines_joined_tbl %>% glimpse()
## Observations: 15,644
## Variables: 12
## $ ...1          <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10...
## $ order.id      <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, ...
## $ order.line    <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, ...
## $ order.date    <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10,...
## $ customer.id   <dbl> 2, 2, 10, 10, 6, 6, 6, 6, 6, 22, 8, 8, 8, 8, 16,...
## $ product.id    <dbl> 48, 52, 76, 52, 2, 50, 1, 4, 34, 26, 96, 66, 35,...
## $ quantity      <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, ...
## $ model         <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of...
## $ description   <chr> "Mountain - Over Mountain - Carbon", "Mountain -...
## $ price         <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330...
## $ bikeshop.name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Cli...
## $ location      <chr> "Ithaca, NY", "Ithaca, NY", "Kansas City, KS", "...
bike_orderlines_wrangled_tbl <- bike_orderlines_joined_tbl %>%
    
    # Separate description into category.1, category.2, and frame.material
    separate(description,
             into = c("category.1", "category.2", "frame.material"),
             sep = " - ",
             remove = TRUE) %>%
    
    # Separate location into city and state
    separate(location,
             into = c("city", "state"),
             sep  = ", ",
             remove = FALSE) %>%
    
    # price extended
    mutate(total.price = price * quantity) %>%
    
    # Reorganize
    select(-...1, -location) %>%
    select(-ends_with(".id")) %>%
    
    bind_cols(bike_orderlines_joined_tbl %>% select(order.id)) %>%
    
    # Reorder columns
    select(contains("date"), contains("id"), contains("order"),
           quantity, price, total.price,
           everything()) %>%
    
    # Renaming columns
    rename(order_date = order.date) %>%
    set_names(names(.) %>% str_replace_all("\\.", "_")) 

bike_orderlines_wrangled_tbl %>% glimpse()
## Observations: 15,644
## Variables: 13
## $ order_date     <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10...
## $ order_id       <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6,...
## $ order_line     <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2,...
## $ quantity       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,...
## $ price          <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 533...
## $ total_price    <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 533...
## $ model          <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast o...
## $ category_1     <chr> "Mountain", "Mountain", "Mountain", "Mountain",...
## $ category_2     <chr> "Over Mountain", "Over Mountain", "Trail", "Ove...
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carb...
## $ bikeshop_name  <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Cl...
## $ city           <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City...
## $ state          <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY",...