library(tidyquant)
## Warning: package 'tidyquant' was built under R version 3.5.3
## Loading required package: lubridate
## Warning: package 'lubridate' was built under R version 3.5.3
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
## Loading required package: PerformanceAnalytics
## Warning: package 'PerformanceAnalytics' was built under R version 3.5.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.5.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.5.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Warning: package 'quantmod' was built under R version 3.5.3
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.5.3
## Version 0.4-0 included new data defaults. See ?getSymbols.
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages --------------------------------------------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 3.1.1 √ purrr 0.3.2
## √ tibble 2.1.1 √ dplyr 0.8.1
## √ tidyr 0.8.3 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'stringr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts ------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks xts::first()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks xts::last()
## x lubridate::setdiff() masks base::setdiff()
## x lubridate::union() masks base::union()
tej <- read_tsv("w2_importing data/tej_day_price_2017_2018_UTF8.txt", col_names = TRUE)
## Parsed with column specification:
## cols(
## 證券代碼 = col_double(),
## 簡稱 = col_character(),
## TSE產業別 = col_character(),
## 上市別 = col_character(),
## 年月日 = col_double(),
## `開盤價(元)` = col_double(),
## `最高價(元)` = col_double(),
## `收盤價(元)` = col_double(),
## `最低價(元)` = col_double(),
## `成交值(千元)` = col_double(),
## `市值(百萬元)` = col_double(),
## `成交量(千股)` = col_double()
## )
saveRDS(tej, "w2_importing data/tej_d_2017_18.rds")
tej <- readRDS("w2_importing data/tej_d_2017_18.rds")
執行tidyquant
用read_tsv函數讀取txt檔資料
用saveRDS函數將資料存成rds檔
用readRDS函數讀取rds檔資料
glimpse(tej)
## Observations: 443,171
## Variables: 12
## $ 證券代碼 <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 120...
## $ 簡稱 <chr> "台泥", "亞泥", "嘉泥", "環泥", "幸福", "信大", "東泥", "味全", "...
## $ TSE產業別 <chr> "01", "01", "01", "01", "01", "01", "01", "02", "0...
## $ 上市別 <chr> "TSE", "TSE", "TSE", "TSE", "TSE", "TSE", "TSE", "...
## $ 年月日 <dbl> 20170103, 20170103, 20170103, 20170103, 20170103, ...
## $ `開盤價(元)` <dbl> 29.90, 24.91, 8.27, 21.41, 8.56, 10.11, 15.58, 17.5...
## $ `最高價(元)` <dbl> 29.90, 24.95, 8.27, 21.54, 8.57, 10.11, 15.63, 17.7...
## $ `收盤價(元)` <dbl> 29.64, 24.95, 8.27, 21.50, 8.57, 10.11, 15.54, 17.6...
## $ `最低價(元)` <dbl> 29.35, 24.76, 8.17, 21.37, 8.56, 10.11, 15.54, 17.5...
## $ `成交值(千元)` <dbl> 101450, 33550, 2411, 3705, 182, 42, 406, 11504, 171,...
## $ `市值(百萬元)` <dbl> 129779, 89078, 6748, 15610, 3703, 3789, 9009, 8931, ...
## $ `成交量(千股)` <dbl> 2890, 1271, 278, 150, 20, 4, 25, 653, 7, 2213, 19, 2...
tej1<-tej %>% select('證券代碼', '年月日', '收盤價(元)') %>%
rename(ID = '證券代碼', date = '年月日', close = '收盤價(元)') %>%
mutate(date = date %>% as.character %>% as.Date('%Y%m%d')) %>%
mutate(ID = ID %>% as.character) %>%
arrange(ID)
tej1
## # A tibble: 443,171 x 3
## ID date close
## <chr> <date> <dbl>
## 1 1101 2017-01-03 29.6
## 2 1101 2017-01-04 29.7
## 3 1101 2017-01-05 29.7
## 4 1101 2017-01-06 29.7
## 5 1101 2017-01-09 29.4
## 6 1101 2017-01-10 29.5
## 7 1101 2017-01-11 30.2
## 8 1101 2017-01-12 30.6
## 9 1101 2017-01-13 30.4
## 10 1101 2017-01-16 30.2
## # ... with 443,161 more rows
用select函數選擇需要的資料欄位
用rename函數將名稱重新命名
用mutate函數新增新的欄位
用arrange函數將資料排列整理
tej.3 <- tej1 %>%
arrange(ID) %>%
filter(ID %in% c("1101", "2317", "2330"))
tej.3
## # A tibble: 1,479 x 3
## ID date close
## <chr> <date> <dbl>
## 1 1101 2017-01-03 29.6
## 2 1101 2017-01-04 29.7
## 3 1101 2017-01-05 29.7
## 4 1101 2017-01-06 29.7
## 5 1101 2017-01-09 29.4
## 6 1101 2017-01-10 29.5
## 7 1101 2017-01-11 30.2
## 8 1101 2017-01-12 30.6
## 9 1101 2017-01-13 30.4
## 10 1101 2017-01-16 30.2
## # ... with 1,469 more rows
用arrange函數將資料排列整理
用filter函數選取需要的資料
%in% 將選取資料於ID裡面
tej.3 <- tej.3 %>%
group_by(ID) %>%
tq_mutate(select = c(close),
mutate_fun = SMA,
n = 5) %>%
rename(ma5 = SMA) %>%
tq_mutate(select = c(close),
mutate_fun = SMA,
n = 10) %>%
rename(ma10 = SMA) %>%
tq_mutate(select = c(close),
mutate_fun = SMA,
n = 20) %>%
rename(ma20 = SMA) %>%
ungroup()
tej.3
## # A tibble: 1,479 x 6
## ID date close ma5 ma10 ma20
## <chr> <date> <dbl> <dbl> <dbl> <dbl>
## 1 1101 2017-01-03 29.6 NA NA NA
## 2 1101 2017-01-04 29.7 NA NA NA
## 3 1101 2017-01-05 29.7 NA NA NA
## 4 1101 2017-01-06 29.7 NA NA NA
## 5 1101 2017-01-09 29.4 29.7 NA NA
## 6 1101 2017-01-10 29.5 29.6 NA NA
## 7 1101 2017-01-11 30.2 29.7 NA NA
## 8 1101 2017-01-12 30.6 29.9 NA NA
## 9 1101 2017-01-13 30.4 30.0 NA NA
## 10 1101 2017-01-16 30.2 30.2 29.9 NA
## # ... with 1,469 more rows
用select函數選擇收盤價、選擇簡單移動平均線
計算5日簡單移動平均線參數
用rename函數將名稱重新命名
計算10日簡單移動平均線參數
計算20日簡單移動平均線參數
tej.3 %>% select(-starts_with("ma")) %>%
spread(key = ID, value = close)
## # A tibble: 493 x 4
## date `1101` `2317` `2330`
## <date> <dbl> <dbl> <dbl>
## 1 2017-01-03 29.6 96.1 171.
## 2 2017-01-04 29.7 96.0 171.
## 3 2017-01-05 29.7 95.7 171.
## 4 2017-01-06 29.7 95.5 172.
## 5 2017-01-09 29.4 95.2 172.
## 6 2017-01-10 29.5 95.7 172.
## 7 2017-01-11 30.2 96.0 170.
## 8 2017-01-12 30.6 96.1 172.
## 9 2017-01-13 30.4 96.0 169.
## 10 2017-01-16 30.2 95.2 168.
## # ... with 483 more rows
將常資料轉為寬資料
library(tidyverse)
library(lubridate)
library(tidyquant)
bike_orderlines_tbl <- read_rds("data_wrangled_student/bike_orderlines.rds")
glimpse(bike_orderlines_tbl)
## Observations: 15,644
## Variables: 13
## $ order_date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10...
## $ order_id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6,...
## $ order_line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2,...
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,...
## $ price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 533...
## $ total_price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 533...
## $ model <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast o...
## $ category_1 <chr> "Mountain", "Mountain", "Mountain", "Mountain",...
## $ category_2 <chr> "Over Mountain", "Over Mountain", "Trail", "Ove...
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carb...
## $ bikeshop_name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Cl...
## $ city <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City...
## $ state <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY",...
執行tidyquant、lubridate、tidyquant
用readRDS函數讀取rds檔資料
order_date_tbl <- bike_orderlines_tbl %>%
select(order_date)
order_date_tbl %>%
pull(order_date) %>%
class()
## [1] "POSIXct" "POSIXt"
用select函數選擇需要的資料欄位
class() - 輸出的種類
order_date_tbl %>%
mutate(order_date_chr = as.character(order_date)) %>%
mutate(order_date_chr2 = order_date_chr %>% str_c(" 00:00:00")) %>%
mutate(order_date_date = order_date_chr %>% ymd()) %>%
mutate(order_date_dttm = order_date_chr2 %>% ymd_hms())
## # A tibble: 15,644 x 5
## order_date order_date_chr order_date_chr2 order_date_date
## <dttm> <chr> <chr> <date>
## 1 2011-01-07 00:00:00 2011-01-07 2011-01-07 00:~ 2011-01-07
## 2 2011-01-07 00:00:00 2011-01-07 2011-01-07 00:~ 2011-01-07
## 3 2011-01-10 00:00:00 2011-01-10 2011-01-10 00:~ 2011-01-10
## 4 2011-01-10 00:00:00 2011-01-10 2011-01-10 00:~ 2011-01-10
## 5 2011-01-10 00:00:00 2011-01-10 2011-01-10 00:~ 2011-01-10
## 6 2011-01-10 00:00:00 2011-01-10 2011-01-10 00:~ 2011-01-10
## 7 2011-01-10 00:00:00 2011-01-10 2011-01-10 00:~ 2011-01-10
## 8 2011-01-10 00:00:00 2011-01-10 2011-01-10 00:~ 2011-01-10
## 9 2011-01-10 00:00:00 2011-01-10 2011-01-10 00:~ 2011-01-10
## 10 2011-01-11 00:00:00 2011-01-11 2011-01-11 00:~ 2011-01-11
## # ... with 15,634 more rows, and 1 more variable: order_date_dttm <dttm>
用mutate函數新增新的欄位
as.character - 為文字
ymd()函數 - 將資料轉為年月日
ymd_hms() - 將資料轉為年月日小時 分鐘 秒
"06/01/18" %>% mdy() %>% class()
## [1] "Date"
"06/01/18 12:30:15" %>% mdy_hms() %>% class()
## [1] "POSIXct" "POSIXt"
"January 1, 1985" %>% mdy()
## [1] "1985-01-01"
"2011-01-01" %>% ymd() %>% year()
## [1] 2011
"2011-01-01" %>% ymd() %>% month(label = TRUE, abbr = FALSE)
## [1] 一月
## 12 Levels: 一月 < 二月 < 三月 < 四月 < 五月 < 六月 < 七月 < ... < 十二月
"2011-01-01" %>% ymd() %>% wday(label = TRUE, abbr = FALSE)
## [1] 星期六
## 7 Levels: 星期日 < 星期一 < 星期二 < 星期三 < 星期四 < ... < 星期六
"2011-01-01" %>% ymd() %>% day()
## [1] 1
# Helpers
now()
## [1] "2019-05-20 12:12:32 CST"
today()
## [1] "2019-05-20"
mdy()函數 - 將資料轉為月 日 年
ymd()函數 - 將資料轉為年 月 日
ymd_hms() - 將資料轉為年月日小時 分鐘 秒
abbr - 縮寫
today() + days(12)
## [1] "2019-06-01"
today() + ddays(12)
## [1] "2019-06-01"
today() + years(4)
## [1] "2023-05-20"
today() + dyears(4)
## [1] "2023-05-19"
簡單的運算式 - Period(一段時間)、Duration(期間)
i <- interval(today(), today() + ddays(12))
i / ddays(1)
## [1] 12
i / dminutes(1)
## [1] 17280
order_date_tbl %>%
mutate(today = today()) %>%
mutate(diff_days = interval(order_date, today) / ddays(1))
## # A tibble: 15,644 x 3
## order_date today diff_days
## <dttm> <date> <dbl>
## 1 2011-01-07 00:00:00 2019-05-20 3055
## 2 2011-01-07 00:00:00 2019-05-20 3055
## 3 2011-01-10 00:00:00 2019-05-20 3052
## 4 2011-01-10 00:00:00 2019-05-20 3052
## 5 2011-01-10 00:00:00 2019-05-20 3052
## 6 2011-01-10 00:00:00 2019-05-20 3052
## 7 2011-01-10 00:00:00 2019-05-20 3052
## 8 2011-01-10 00:00:00 2019-05-20 3052
## 9 2011-01-10 00:00:00 2019-05-20 3052
## 10 2011-01-11 00:00:00 2019-05-20 3051
## # ... with 15,634 more rows
interval / ddays =間隔多少天
interval / dminutes =間隔中的分鐘數
用mutate函數新增新的欄位
bike_sales_y_tbl <- bike_orderlines_tbl %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year = year(order_date)) %>%
group_by(year) %>%
summarize(sales = sum(total_price)) %>%
ungroup()
bike_sales_y_tbl
## # A tibble: 5 x 2
## year sales
## <dbl> <dbl>
## 1 2011 11292885
## 2 2012 12163075
## 3 2013 16480775
## 4 2014 13924085
## 5 2015 17171510
bike_sales_m_tbl <- bike_orderlines_tbl %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(
year = year(order_date),
month = month(order_date, label = TRUE, abbr = TRUE)
) %>%
group_by(year, month) %>%
summarize(sales = sum(total_price)) %>%
ungroup()
bike_sales_m_tbl
## # A tibble: 60 x 3
## year month sales
## <dbl> <ord> <dbl>
## 1 2011 一月 483015
## 2 2011 二月 1162075
## 3 2011 三月 659975
## 4 2011 四月 1827140
## 5 2011 五月 844170
## 6 2011 六月 1413445
## 7 2011 七月 1194430
## 8 2011 八月 679790
## 9 2011 九月 814720
## 10 2011 十月 734920
## # ... with 50 more rows
bike_sales_y_tbl - 計算年的資料
用select函數選擇需要的資料欄位
用mutate函數新增新的欄位
ymd()函數 - 將資料轉為年 月 日
用group_by函數 - 群組起來
用summarize函數加總
bike_sales_m_tbl - 計算月的資料
bike_orderlines_tbl %>%
select(order_date, total_price) %>%
mutate(order_date = ymd(order_date)) %>%
mutate(year_month = floor_date(order_date, unit = "month")) %>%
group_by(year_month) %>%
summarize(sales = sum(total_price))
## # A tibble: 60 x 2
## year_month sales
## <date> <dbl>
## 1 2011-01-01 483015
## 2 2011-02-01 1162075
## 3 2011-03-01 659975
## 4 2011-04-01 1827140
## 5 2011-05-01 844170
## 6 2011-06-01 1413445
## 7 2011-07-01 1194430
## 8 2011-08-01 679790
## 9 2011-09-01 814720
## 10 2011-10-01 734920
## # ... with 50 more rows
用select函數選擇需要的資料欄位
用mutate函數新增新的欄位
第一個日期floor_date
用group_by函數 - 群組起來
用summarize函數加總
bike_sales_y_tbl %>%
mutate(sales_lag_1 = lag(sales, n = 1)) %>%
mutate(sales_lag_1 = case_when(
is.na(sales_lag_1) ~ sales,
TRUE ~ sales_lag_1
)) %>%
mutate(diff_1 = sales - sales_lag_1) %>%
mutate(pct_diff_1 = diff_1 / sales_lag_1) %>%
mutate(pct_diff_1_chr = scales::percent(pct_diff_1))
## # A tibble: 5 x 6
## year sales sales_lag_1 diff_1 pct_diff_1 pct_diff_1_chr
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2011 11292885 11292885 0 0 0.0%
## 2 2012 12163075 11292885 870190 0.0771 7.7%
## 3 2013 16480775 12163075 4317700 0.355 35.5%
## 4 2014 13924085 16480775 -2556690 -0.155 -15.5%
## 5 2015 17171510 13924085 3247425 0.233 23.3%
calculate_pct_diff <- function(data) {
data %>%
mutate(sales_lag_1 = lag(sales, n = 1)) %>%
mutate(sales_lag_1 = case_when(
is.na(sales_lag_1) ~ sales,
TRUE ~ sales_lag_1
)) %>%
mutate(diff_1 = sales - sales_lag_1) %>%
mutate(pct_diff_1 = diff_1 / sales_lag_1) %>%
mutate(pct_diff_1_chr = scales::percent(pct_diff_1))
}
bike_sales_m_tbl %>%
calculate_pct_diff()
## # A tibble: 60 x 7
## year month sales sales_lag_1 diff_1 pct_diff_1 pct_diff_1_chr
## <dbl> <ord> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2011 一月 483015 483015 0 0 0%
## 2 2011 二月 1162075 483015 679060 1.41 141%
## 3 2011 三月 659975 1162075 -502100 -0.432 -43%
## 4 2011 四月 1827140 659975 1167165 1.77 177%
## 5 2011 五月 844170 1827140 -982970 -0.538 -54%
## 6 2011 六月 1413445 844170 569275 0.674 67%
## 7 2011 七月 1194430 1413445 -219015 -0.155 -15%
## 8 2011 八月 679790 1194430 -514640 -0.431 -43%
## 9 2011 九月 814720 679790 134930 0.198 20%
## 10 2011 十月 734920 814720 -79800 -0.0979 -10%
## # ... with 50 more rows
用mutate函數新增新的欄位並處理NA資料
lag - 往後一期
percent(pct_diff_1) - 轉回百分比
calculate_pct_diff <- function(data) {… - 固定的程式(百分比)
case_when - 向EXCEL裡的IF EALES
bike_sales_y_tbl %>%
mutate(sales_2011 = first(sales)) %>%
mutate(diff_2011 = sales - sales_2011) %>%
mutate(pct_diff_2011 = diff_2011 / sales_2011) %>%
mutate(pct_diff_2011_chr = scales::percent(pct_diff_2011))
## # A tibble: 5 x 6
## year sales sales_2011 diff_2011 pct_diff_2011 pct_diff_2011_chr
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2011 11292885 11292885 0 0 0.0%
## 2 2012 12163075 11292885 870190 0.0771 7.7%
## 3 2013 16480775 11292885 5187890 0.459 45.9%
## 4 2014 13924085 11292885 2631200 0.233 23.3%
## 5 2015 17171510 11292885 5878625 0.521 52.1%
bike_sales_m_tbl %>%
group_by(year) %>%
mutate(sales_jan = first(sales)) %>%
mutate(
diff_jan = sales - sales_jan,
pct_diff_jan = diff_jan / sales_jan,
pct_diff_jan_chr = scales::percent(pct_diff_jan)
)
## # A tibble: 60 x 7
## # Groups: year [5]
## year month sales sales_jan diff_jan pct_diff_jan pct_diff_jan_chr
## <dbl> <ord> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2011 一月 483015 483015 0 0 0%
## 2 2011 二月 1162075 483015 679060 1.41 141%
## 3 2011 三月 659975 483015 176960 0.366 37%
## 4 2011 四月 1827140 483015 1344125 2.78 278%
## 5 2011 五月 844170 483015 361155 0.748 75%
## 6 2011 六月 1413445 483015 930430 1.93 193%
## 7 2011 七月 1194430 483015 711415 1.47 147%
## 8 2011 八月 679790 483015 196775 0.407 41%
## 9 2011 九月 814720 483015 331705 0.687 69%
## 10 2011 十月 734920 483015 251905 0.522 52%
## # ... with 50 more rows
bike_sales_y_tbl - 處理計算年的資料
用mutate函數新增新的欄
用group_by函數 - 群組起來
bike_sales_m_tbl - 處理計算月的資料
bike_sales_y_tbl %>%
mutate(cumulative_sales = cumsum(sales)) %>%
mutate(cumulative_sales_pct = cumulative_sales / sum(sales)) %>%
mutate(cumulative_sales_pct_chr = cumulative_sales_pct %>% scales::percent())
## # A tibble: 5 x 5
## year sales cumulative_sales cumulative_sales_p~ cumulative_sales_pct~
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2011 11292885 11292885 0.159 15.9%
## 2 2012 12163075 23455960 0.330 33.0%
## 3 2013 16480775 39936735 0.562 56.2%
## 4 2014 13924085 53860820 0.758 75.8%
## 5 2015 17171510 71032330 1 100.0%
bike_sales_m_tbl %>%
group_by(year) %>%
mutate(cumulative_sales = cumsum(sales)) %>%
mutate(cumulative_sales_pct = cumulative_sales / sum(sales)) %>%
mutate(cumulative_sales_pct_chr = scales::percent(cumulative_sales_pct))
## # A tibble: 60 x 6
## # Groups: year [5]
## year month sales cumulative_sales cumulative_sales~ cumulative_sales~
## <dbl> <ord> <dbl> <dbl> <dbl> <chr>
## 1 2011 一月 483015 483015 0.0428 4.3%
## 2 2011 二月 1162075 1645090 0.146 14.6%
## 3 2011 三月 659975 2305065 0.204 20.4%
## 4 2011 四月 1827140 4132205 0.366 36.6%
## 5 2011 五月 844170 4976375 0.441 44.1%
## 6 2011 六月 1413445 6389820 0.566 56.6%
## 7 2011 七月 1194430 7584250 0.672 67.2%
## 8 2011 八月 679790 8264040 0.732 73.2%
## 9 2011 九月 814720 9078760 0.804 80.4%
## 10 2011 十月 734920 9813680 0.869 86.9%
## # ... with 50 more rows
用mutate函數新增新的欄
用group_by函數 - 群組起來
bike_sales_m_tbl %>%
mutate(roll_mean_3 = rollmean(sales, k = 3, na.pad = TRUE, align = "right", fill = NA)) %>%
mutate(roll_mean_6 = rollmean(sales, k = 6, na.pad = TRUE, align = "right", fill = NA))
## # A tibble: 60 x 5
## year month sales roll_mean_3 roll_mean_6
## <dbl> <ord> <dbl> <dbl> <dbl>
## 1 2011 一月 483015 NA NA
## 2 2011 二月 1162075 NA NA
## 3 2011 三月 659975 768355 NA
## 4 2011 四月 1827140 1216397. NA
## 5 2011 五月 844170 1110428. NA
## 6 2011 六月 1413445 1361585 1064970
## 7 2011 七月 1194430 1150682. 1183539.
## 8 2011 八月 679790 1095888. 1103158.
## 9 2011 九月 814720 896313. 1128949.
## 10 2011 十月 734920 743143. 946912.
## # ... with 50 more rows
用mutate函數新增新的欄
rollmean類似SMA取線
bike_orderlines_tbl %>%
mutate(order_date = ymd(order_date)) %>%
filter(order_date %>% between(left = ymd("2012-01-01"), right = ymd("2013-12-31")))
## # A tibble: 6,311 x 13
## order_date order_id order_line quantity price total_price model
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2012-01-03 323 1 1 3200 3200 Slic~
## 2 2012-01-03 323 2 1 2130 2130 Fat ~
## 3 2012-01-03 323 3 1 1620 1620 Beas~
## 4 2012-01-03 323 4 2 1410 2820 CAAD~
## 5 2012-01-03 323 5 1 3200 3200 Jeky~
## 6 2012-01-03 324 1 1 3200 3200 Trig~
## 7 2012-01-03 324 2 2 1950 3900 CAAD~
## 8 2012-01-03 324 3 1 815 815 CAAD~
## 9 2012-01-03 324 4 1 815 815 Trai~
## 10 2012-01-03 324 5 1 2880 2880 F-Si~
## # ... with 6,301 more rows, and 6 more variables: category_1 <chr>,
## # category_2 <chr>, frame_material <chr>, bikeshop_name <chr>,
## # city <chr>, state <chr>
bike_orderlines_tbl %>%
mutate(order_date = ymd(order_date)) %>%
filter(year(order_date) %in% c(2012, 2013))
## # A tibble: 6,311 x 13
## order_date order_id order_line quantity price total_price model
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2012-01-03 323 1 1 3200 3200 Slic~
## 2 2012-01-03 323 2 1 2130 2130 Fat ~
## 3 2012-01-03 323 3 1 1620 1620 Beas~
## 4 2012-01-03 323 4 2 1410 2820 CAAD~
## 5 2012-01-03 323 5 1 3200 3200 Jeky~
## 6 2012-01-03 324 1 1 3200 3200 Trig~
## 7 2012-01-03 324 2 2 1950 3900 CAAD~
## 8 2012-01-03 324 3 1 815 815 CAAD~
## 9 2012-01-03 324 4 1 815 815 Trai~
## 10 2012-01-03 324 5 1 2880 2880 F-Si~
## # ... with 6,301 more rows, and 6 more variables: category_1 <chr>,
## # category_2 <chr>, frame_material <chr>, bikeshop_name <chr>,
## # city <chr>, state <chr>
用mutate函數新增新的欄
用filter函數設定資料
%in% 將選取資料於year(order_date)裡面