1

?read_excel()
## starting httpd help server ... done
bikes_tbl <- read_excel("bikes.xlsx")

bikeshops_tbl <- read_excel("bikeshops.xlsx")

orderlines_tbl <- read_excel("orderlines.xlsx")
## New names:
## * `` -> `..1`
bikes_tbl
## # A tibble: 97 x 4
##    bike.id model                          description                price
##      <dbl> <chr>                          <chr>                      <dbl>
##  1       1 Supersix Evo Black Inc.        Road - Elite Road - Carbon 12790
##  2       2 Supersix Evo Hi-Mod Team       Road - Elite Road - Carbon 10660
##  3       3 Supersix Evo Hi-Mod Dura Ace 1 Road - Elite Road - Carbon  7990
##  4       4 Supersix Evo Hi-Mod Dura Ace 2 Road - Elite Road - Carbon  5330
##  5       5 Supersix Evo Hi-Mod Utegra     Road - Elite Road - Carbon  4260
##  6       6 Supersix Evo Red               Road - Elite Road - Carbon  3940
##  7       7 Supersix Evo Ultegra 3         Road - Elite Road - Carbon  3200
##  8       8 Supersix Evo Ultegra 4         Road - Elite Road - Carbon  2660
##  9       9 Supersix Evo 105               Road - Elite Road - Carbon  2240
## 10      10 Supersix Evo Tiagra            Road - Elite Road - Carbon  1840
## # ... with 87 more rows
glimpse(bikes_tbl)
## Observations: 97
## Variables: 4
## $ bike.id     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,...
## $ model       <chr> "Supersix Evo Black Inc.", "Supersix Evo Hi-Mod Te...
## $ description <chr> "Road - Elite Road - Carbon", "Road - Elite Road -...
## $ price       <dbl> 12790, 10660, 7990, 5330, 4260, 3940, 3200, 2660, ...
bikeshops_tbl
## # A tibble: 30 x 3
##    bikeshop.id bikeshop.name                location       
##          <dbl> <chr>                        <chr>          
##  1           1 Pittsburgh Mountain Machines Pittsburgh, PA 
##  2           2 Ithaca Mountain Climbers     Ithaca, NY     
##  3           3 Columbus Race Equipment      Columbus, OH   
##  4           4 Detroit Cycles               Detroit, MI    
##  5           5 Cincinnati Speed             Cincinnati, OH 
##  6           6 Louisville Race Equipment    Louisville, KY 
##  7           7 Nashville Cruisers           Nashville, TN  
##  8           8 Denver Bike Shop             Denver, CO     
##  9           9 Minneapolis Bike Shop        Minneapolis, MN
## 10          10 Kansas City 29ers            Kansas City, KS
## # ... with 20 more rows
orderlines_tbl
## # A tibble: 15,644 x 7
##    ..1   order.id order.line order.date          customer.id product.id
##    <chr>    <dbl>      <dbl> <dttm>                    <dbl>      <dbl>
##  1 1            1          1 2011-01-07 00:00:00           2         48
##  2 2            1          2 2011-01-07 00:00:00           2         52
##  3 3            2          1 2011-01-10 00:00:00          10         76
##  4 4            2          2 2011-01-10 00:00:00          10         52
##  5 5            3          1 2011-01-10 00:00:00           6          2
##  6 6            3          2 2011-01-10 00:00:00           6         50
##  7 7            3          3 2011-01-10 00:00:00           6          1
##  8 8            3          4 2011-01-10 00:00:00           6          4
##  9 9            3          5 2011-01-10 00:00:00           6         34
## 10 10           4          1 2011-01-11 00:00:00          22         26
## # ... with 15,634 more rows, and 1 more variable: quantity <dbl>
?left_join

orderlines_tbl
## # A tibble: 15,644 x 7
##    ..1   order.id order.line order.date          customer.id product.id
##    <chr>    <dbl>      <dbl> <dttm>                    <dbl>      <dbl>
##  1 1            1          1 2011-01-07 00:00:00           2         48
##  2 2            1          2 2011-01-07 00:00:00           2         52
##  3 3            2          1 2011-01-10 00:00:00          10         76
##  4 4            2          2 2011-01-10 00:00:00          10         52
##  5 5            3          1 2011-01-10 00:00:00           6          2
##  6 6            3          2 2011-01-10 00:00:00           6         50
##  7 7            3          3 2011-01-10 00:00:00           6          1
##  8 8            3          4 2011-01-10 00:00:00           6          4
##  9 9            3          5 2011-01-10 00:00:00           6         34
## 10 10           4          1 2011-01-11 00:00:00          22         26
## # ... with 15,634 more rows, and 1 more variable: quantity <dbl>
bikes_tbl
## # A tibble: 97 x 4
##    bike.id model                          description                price
##      <dbl> <chr>                          <chr>                      <dbl>
##  1       1 Supersix Evo Black Inc.        Road - Elite Road - Carbon 12790
##  2       2 Supersix Evo Hi-Mod Team       Road - Elite Road - Carbon 10660
##  3       3 Supersix Evo Hi-Mod Dura Ace 1 Road - Elite Road - Carbon  7990
##  4       4 Supersix Evo Hi-Mod Dura Ace 2 Road - Elite Road - Carbon  5330
##  5       5 Supersix Evo Hi-Mod Utegra     Road - Elite Road - Carbon  4260
##  6       6 Supersix Evo Red               Road - Elite Road - Carbon  3940
##  7       7 Supersix Evo Ultegra 3         Road - Elite Road - Carbon  3200
##  8       8 Supersix Evo Ultegra 4         Road - Elite Road - Carbon  2660
##  9       9 Supersix Evo 105               Road - Elite Road - Carbon  2240
## 10      10 Supersix Evo Tiagra            Road - Elite Road - Carbon  1840
## # ... with 87 more rows
left_join(orderlines_tbl, bikes_tbl, by = c("product.id" = "bike.id"))
## # A tibble: 15,644 x 10
##    ..1   order.id order.line order.date          customer.id product.id
##    <chr>    <dbl>      <dbl> <dttm>                    <dbl>      <dbl>
##  1 1            1          1 2011-01-07 00:00:00           2         48
##  2 2            1          2 2011-01-07 00:00:00           2         52
##  3 3            2          1 2011-01-10 00:00:00          10         76
##  4 4            2          2 2011-01-10 00:00:00          10         52
##  5 5            3          1 2011-01-10 00:00:00           6          2
##  6 6            3          2 2011-01-10 00:00:00           6         50
##  7 7            3          3 2011-01-10 00:00:00           6          1
##  8 8            3          4 2011-01-10 00:00:00           6          4
##  9 9            3          5 2011-01-10 00:00:00           6         34
## 10 10           4          1 2011-01-11 00:00:00          22         26
## # ... with 15,634 more rows, and 4 more variables: quantity <dbl>,
## #   model <chr>, description <chr>, price <dbl>
bike_orderlines_joined_tbl <- orderlines_tbl %>%
    left_join(bikes_tbl, by = c("product.id" = "bike.id")) %>%
    left_join(bikeshops_tbl, by = c("customer.id" = "bikeshop.id"))

bike_orderlines_joined_tbl
## # A tibble: 15,644 x 12
##    ..1   order.id order.line order.date          customer.id product.id
##    <chr>    <dbl>      <dbl> <dttm>                    <dbl>      <dbl>
##  1 1            1          1 2011-01-07 00:00:00           2         48
##  2 2            1          2 2011-01-07 00:00:00           2         52
##  3 3            2          1 2011-01-10 00:00:00          10         76
##  4 4            2          2 2011-01-10 00:00:00          10         52
##  5 5            3          1 2011-01-10 00:00:00           6          2
##  6 6            3          2 2011-01-10 00:00:00           6         50
##  7 7            3          3 2011-01-10 00:00:00           6          1
##  8 8            3          4 2011-01-10 00:00:00           6          4
##  9 9            3          5 2011-01-10 00:00:00           6         34
## 10 10           4          1 2011-01-11 00:00:00          22         26
## # ... with 15,634 more rows, and 6 more variables: quantity <dbl>,
## #   model <chr>, description <chr>, price <dbl>, bikeshop.name <chr>,
## #   location <chr>
bike_orderlines_joined_tbl %>% glimpse()
## Observations: 15,644
## Variables: 12
## $ `..1`         <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10...
## $ order.id      <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, ...
## $ order.line    <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, ...
## $ order.date    <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10,...
## $ customer.id   <dbl> 2, 2, 10, 10, 6, 6, 6, 6, 6, 22, 8, 8, 8, 8, 16,...
## $ product.id    <dbl> 48, 52, 76, 52, 2, 50, 1, 4, 34, 26, 96, 66, 35,...
## $ quantity      <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, ...
## $ model         <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of...
## $ description   <chr> "Mountain - Over Mountain - Carbon", "Mountain -...
## $ price         <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330...
## $ bikeshop.name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Cli...
## $ location      <chr> "Ithaca, NY", "Ithaca, NY", "Kansas City, KS", "...
bike_orderlines_wrangled_tbl <- bike_orderlines_joined_tbl %>%
    
    # Separate description into category.1, category.2, and frame.material
    separate(description,
             into = c("category.1", "category.2", "frame.material"),
             sep = " - ",
             remove = TRUE) %>%
    
    # Separate location into city and state
    separate(location,
             into = c("city", "state"),
             sep  = ", ",
             remove = FALSE) %>%
    
    # price extended 
   mutate(total.price = price * quantity) %>%
    
    # Reorganize
    select(-1, -location) %>%
    select(-ends_with(".id")) %>%
    
    bind_cols(bike_orderlines_joined_tbl %>% select(order.id)) %>%
    
    # Reorder columns
    select(contains("date"), contains("id"), contains("order"),
           quantity, price, total.price,
           everything()) %>%
    
    # Renaming columns
    rename(order_date = order.date) %>%
    set_names(names(.) %>% str_replace_all("\\.", "_")) 

bike_orderlines_wrangled_tbl %>% glimpse()
## Observations: 15,644
## Variables: 13
## $ order_date     <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10...
## $ order_id       <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6,...
## $ order_line     <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2,...
## $ quantity       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,...
## $ price          <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 533...
## $ total_price    <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 533...
## $ model          <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast o...
## $ category_1     <chr> "Mountain", "Mountain", "Mountain", "Mountain",...
## $ category_2     <chr> "Over Mountain", "Over Mountain", "Trail", "Ove...
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carb...
## $ bikeshop_name  <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Cl...
## $ city           <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City...
## $ state          <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY",...
sales_by_year_tbl <- bike_orderlines_wrangled_tbl %>%
    
    # Selecting columns to focus on and adding a year column
    select(order_date, total_price) %>%
    mutate(year = year(order_date)) %>%
    
    # Grouping by year, and summarizing sales
    group_by(year) %>%
    summarize(sales = sum(total_price)) %>%
    ungroup() %>%
    
    # $ Format Text
    mutate(sales_text = scales::dollar(sales))
    
sales_by_year_tbl
## # A tibble: 5 x 3
##    year    sales sales_text 
##   <dbl>    <dbl> <chr>      
## 1  2011 11292885 $11,292,885
## 2  2012 12163075 $12,163,075
## 3  2013 16480775 $16,480,775
## 4  2014 13924085 $13,924,085
## 5  2015 17171510 $17,171,510
sales_by_year_tbl %>%
    
    # Setup canvas with year (x-axis) and sales (y-axis)
    ggplot(aes(x = year, y = sales)) +
    
    # Geometries
    geom_col(fill = "#2c3e50") +
    geom_label(aes(label = sales_text)) +
    geom_smooth(method = "lm", se = FALSE) +
    
    # Formatting
    #theme_tq() +
    scale_y_continuous(labels = scales::dollar) +
    labs(
        title = "Revenue by Year",
        subtitle = "Upward trend",
        x = "",
        y = "Revenue"
    )

sales_by_year_cat_2_tbl <- bike_orderlines_wrangled_tbl %>%
    
    # Selecting columns and add a year
    select(order_date, total_price, category_2) %>%
    mutate(year = year(order_date)) %>%
    
    # Groupby and Summarize year and category 2
    group_by(year, category_2) %>%
    summarise(sales = sum(total_price)) %>%
    ungroup() %>%
    
    # Format $ Text
    mutate(sales_text = scales::dollar(sales))


sales_by_year_cat_2_tbl
## # A tibble: 45 x 4
##     year category_2           sales sales_text
##    <dbl> <chr>                <dbl> <chr>     
##  1  2011 Cross Country Race 2917250 $2,917,250
##  2  2011 Cyclocross          378980 $378,980  
##  3  2011 Elite Road         2493315 $2,493,315
##  4  2011 Endurance Road     1606230 $1,606,230
##  5  2011 Fat Bike            221600 $221,600  
##  6  2011 Over Mountain      1328510 $1,328,510
##  7  2011 Sport               309290 $309,290  
##  8  2011 Trail              1446560 $1,446,560
##  9  2011 Triathalon          591150 $591,150  
## 10  2012 Cross Country Race 3360800 $3,360,800
## # ... with 35 more rows
p<-sales_by_year_cat_2_tbl %>%
    
    # Set up x, y, fill 
    ggplot(aes(x = year, y = sales, fill = category_2)) +
    
    # Geometries
    geom_col() +
    geom_smooth(method = "lm", se = FALSE) +
    
    # Facet
    facet_wrap(~ category_2, ncol = 3, scales = "free_y") +
    
    # Formatting
    #theme_tq() +
    #scale_fill_tq() +
    scale_y_continuous(labels = scales::dollar) +
    labs(
        title = "Revenue by Year and Category 2",
        subtitle = "Each product category has an upward trend",
        x = "",
        y = "Revenue",
        fill = "Product Secondary Category"
    )
p

2

library(data.table)
## Warning: package 'data.table' was built under R version 3.5.3
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday,
##     week, yday, year
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
m.price=read.table("c:/hw0506/HW0506/2010-2018price.txt")
m.price<-m.price[,-2]
colnames(m.price)<-c("id", "","", "date", "close")
head(m.price)
##         id                      date      close
## 1 證券代碼 上市別 TSE產業別   年月日 收盤價(元)
## 2     1101    TSE        01 20100104      19.70
## 3     1102    TSE        01 20100104      20.24
## 4     1103    TSE        01 20100104      13.91
## 5     1104    TSE        01 20100104      11.32
## 6     1108    TSE        01 20100104       6.82
mprice.reorder = dcast(m.price,date~id)
## Using 'close' as value column. Use 'value.var' to override
dim(mprice.reorder)
## [1] 2224  930
write_rds(m.price,"2010-2018price.rds")