suppressPackageStartupMessages(require('BBmisc'))
## 读取程序包
pkg <- c('devtools', 'tidyverse', 'timetk', 'lubridate', 'plyr', 'dplyr', 'magrittr', 'purrr', 'stringr', 'reshape', 'formattable', 'microbenchmark', 'knitr', 'kableExtra', 'VIM', 'mice', 'miceAdds', 'mi', 'mitools', 'Amelia', 'missForest', 'Hmisc', 'DMwR', 'imputeTS', 'tidyimpute', 'mtsdi', 'xts', 'forecast', 'marima', 'missMDA')
suppressAll(lib(pkg))
funs <- c('convertOHLC.R')
l_ply(funs, function(x) source(paste0('./function/', x)))
algo <- c('interpolation', 'locf', 'mean', 'random', 'kalman', 'ma')
rm(pkg, funs)
由于在科研binary.com Interview Question I - Interday High Frequency Trading Models Comparison测试高频率量化交易时,从fxcm/MarketData下载的数据并不完整1,binary.com 面试试题 I - 单变量数据缺失值管理尝试弥补缺失值不果,单变量无法辨认开市价、最高价、最低价和闭市价之间的关系。
auto.arima()
来弥补缺失值。imputeTS
: Time Series Missing Value Imputation in RimpueTS
程序包imputeTS - Time Series Missing Value Imputation in R讲述mice
、Amelia
、missMDA
与VIM
都是多变量弥补数据程序包,而imputeTS
乃单变量弥补数据程序包,不过程序包中的seadec()
函数乃弥补季节性数据。
Simple | Imputation Imputation | Plots & Statistics | Datasets |
---|---|---|---|
na.locf | na.interpolation | plotNA.distribution | tsAirgap |
na.mean | na.kalman | plotNA.distributionBar | tsAirgapComplete |
na.random | na.ma | plotNA.gapsize | tsHeating |
na.replace | na.seadec | plotNA.imputations | tsHeatingComplete |
na.remove | na.seasplit | statsNA | tsNH4 |
tsNH4Complete |
Table 1: General Overview imputeTS package
Function | Option | Description |
---|---|---|
na.interpolation | linear | Imputation by Linear Interpolation |
spline | Imputation by Spline Interpolation | |
stine | Imputation by Stineman Interpolation | |
na.kalman | StructTS | Imputation by Structural Model & Kalman Smoothing |
auto.arima | Imputation by ARIMA State Space Representation & Kalman Sm. | |
na.locf | locf | Imputation by Last Observation Carried Forward |
nocb | Imputation by Next Observation Carried Backward | |
na.ma | simple | Missing Value Imputation by Simple Moving Average |
linear | Missing Value Imputation by Linear Weighted Moving Average | |
exponential | Missing Value Imputation by Exponential Weighted Moving Average | |
na.mean | mean | MissingValue Imputation by Mean Value |
median | Missing Value Imputation by Median Value | |
mode | Missing Value Imputation by Mode Value | |
na.random | Missing Value Imputation by Random Sample | |
na.replace | Replace Missing Values by a Defined Value | |
na.seadec | Seasonally Decomposed Missing Value Imputation | |
na.seasplit | Seasonally Splitted Missing Value Imputation | |
na.remove | Remove Missing Values |
Table 3: Overview Imputation Algorithms
Amelia
程序包Amelia II: A Program for Missing Data介绍Amelia
程序包,而AMELIA II - A Program for Missing Data教导如何使用该程序包。Error in as.POSIXct.numeric(value) : ‘origin’ must be supplied #18显示时间变量无法弥补,故此对于Amelia
缺失值,僕得省略掉时间变量,仅设置价格变量为缺失值而已。
mice
程序包可以使用lm
函数将弥补数据线型化,tidyr
程序包中有个fill()
函数可以。而dendextend::na_locf()
会比zoo::na.locf()
高效率,不过弥补数据时会遇到一些参数问题。
和之前的单变量一样,首先僕随机导入每分钟为1个时间单位的数据。
Error in optim(init[mask], getLike, method = "L-BFGS-B", lower = rep(0, : L-BFGS-B needs finite values of 'fn'
17. optim(init[mask], getLike, method = "L-BFGS-B", lower = rep(0, np + 1L), upper = rep(Inf, np + 1L), control = optim.control)
16. StructTS(data, ...)
15. na.kalman(data, ...)
14. apply.base.algorithm(data, algorithm = algorithm, ...)
13. .f(.x[[i]], ...)
12. map(., na.seadec, algorithm = x)
11. function_list[[i]](value)
10. freduce(value, `_function_list`)
9. `_fseq`(`_lhs`)
8. eval(quote(`_fseq`(`_lhs`)), env, env)
7. eval(quote(`_fseq`(`_lhs`)), env, env)
6. withVisible(eval(quote(`_fseq`(`_lhs`)), env, env))
5. data_m1_NA %>% dplyr::select(starts_with("Ask"), starts_with("Bid")) %>% map(na.seadec, algorithm = x) %>% as.tibble
4. FUN(X[[i]], ...)
3. lapply(pieces, .fun, ...)
2. structure(lapply(pieces, .fun, ...), dim = dim(pieces))
1. llply(algo, function(x) { data_m1_NA %>% dplyr::select(starts_with("Ask"), starts_with("Bid")) %>% map(na.seadec, algorithm = x) %>% as.tibble })
由于频频出现错误信息#imputeTS/issues/26,于此僕使用sort(sample(length(fls), 1))随机筛选1个文件。
pth <- 'C:/Users/scibr/Documents/GitHub/scibrokes/real-time-fxcm/data/USDJPY/'
fls <- list.files(pth, pattern = '^Y[0-9]{4}W[1-9]{1,2}_m1.rds$')
## 1分钟数据
## 由于频频出现错误信息,于此僕使用sort(sample(length(fls), 1))随机筛选4个文件。
data_m1 <- llply(fls[sort(sample(length(fls), 1))], function(x) {
y <- readRDS(paste0(pth, x)) %>%
dplyr::rename(index = DateTime) %>%
mutate(index = index %>% mdy_hms %>%
.POSIXct(tz = 'Europe/Athens') %>%
force_tz())
yw <- x %>% str_extract_all('Y[0-9]{4}W[0-9]{1,2}') %>%
str_split_fixed('[A-Z]{1}', 3) %>% .[,-1]
nch <- y$index[1] %>% substr(nchar(.)+2, nchar(.)+3)
y %<>% mutate(
year = as.numeric(yw[1]), week = as.numeric(yw[2]),
nch = nch, index = if_else(
nch == '23', index + hours(1), index)) %>%
dplyr::select(-nch)
}) %>% bind_rows %>% tbl_df %>% arrange(index)
dim(data_m1)
## [1] 7149 11
data_m1
## # A tibble: 7,149 x 11
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-10-17 00:00:00 104. 104. 104. 104. 104. 104.
## 2 2016-10-17 00:01:00 104. 104. 104. 104. 104. 104.
## 3 2016-10-17 00:02:00 104. 104. 104. 104. 104. 104.
## 4 2016-10-17 00:03:00 104. 104. 104. 104. 104. 104.
## 5 2016-10-17 00:04:00 104. 104. 104. 104. 104. 104.
## 6 2016-10-17 00:05:00 104. 104. 104. 104. 104. 104.
## 7 2016-10-17 00:06:00 104. 104. 104. 104. 104. 104.
## 8 2016-10-17 00:07:00 104. 104. 104. 104. 104. 104.
## 9 2016-10-17 00:08:00 104. 104. 104. 104. 104. 104.
## 10 2016-10-17 00:09:00 104. 104. 104. 104. 104. 104.
## # ... with 7,139 more rows, and 4 more variables: AskLow <dbl>,
## # AskClose <dbl>, year <dbl>, week <dbl>
## 检验原始数据是否存在偏差。
data_m1 %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1 %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1) %>%
kable(caption = 'Bias Imputation') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%', height = '400px')
index | BidOpen | BidHigh | BidLow | BidClose | AskOpen | AskHigh | AskLow | AskClose | year | week | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
接着,导入Tick数据2,并且转为每分钟为1时间单位。
pth <- 'C:/Users/scibr/Documents/GitHub/scibrokes/real-time-fxcm/data/USDJPY/'
fls <- list.files(pth, pattern = '^Y[0-9]{4}W[1-9]{1,2}.rds$')
## Tick数据转为1分钟数据
## 由于频频出现错误信息,于此僕使用sort(sample(length(fls), 1))随机筛选2个文件。
data_tm1 <- llply(fls[sort(sample(length(fls), 1))], function(x) {
y <- readRDS(paste0(pth, x)) %>%
convertOHLC(combine = TRUE)
yw <- x %>% str_extract_all('Y[0-9]{4}W[0-9]{1,2}') %>%
str_split_fixed('[A-Z]{1}', 3) %>% .[,-1]
y %<>% mutate(
year = as.numeric(yw[1]), week = as.numeric(yw[2]), .)
}) %>% bind_rows %>% tbl_df %>% arrange(index)
dim(data_tm1)
## [1] 1628 11
data_tm1
## # A tibble: 1,628 x 11
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-08-07 00:02:00 111. 111. 111. 111. 111. 111.
## 2 2017-08-07 00:03:00 111. 111. 111. 111. 111. 111.
## 3 2017-08-07 00:04:00 111. 111. 111. 111. 111. 111.
## 4 2017-08-07 00:05:00 111. 111. 111. 111. 111. 111.
## 5 2017-08-07 00:06:00 111. 111. 111. 111. 111. 111.
## 6 2017-08-07 00:07:00 111. 111. 111. 111. 111. 111.
## 7 2017-08-07 00:08:00 111. 111. 111. 111. 111. 111.
## 8 2017-08-07 00:09:00 111. 111. 111. 111. 111. 111.
## 9 2017-08-07 00:11:00 111. 111. 111. 111. 111. 111.
## 10 2017-08-07 00:17:00 111. 111. 111. 111. 111. 111.
## # ... with 1,618 more rows, and 4 more variables: AskLow <dbl>,
## # AskClose <dbl>, year <dbl>, week <dbl>
## 检验原始数据是否存在偏差。
data_tm1 %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1 %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1) %>%
kable(caption = 'Bias Imputation') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%', height = '400px')
index | BidOpen | BidHigh | BidLow | BidClose | AskOpen | AskHigh | AskLow | AskClose | year | week | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
现在尝试随机设置缺失值。
data_m1_NA <- data_m1 %>%
dplyr::select(index, BidOpen, BidHigh, BidLow, BidClose, AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.01)
data_m1_NA
## # A tibble: 7,149 x 9
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-10-17 00:00:00 104. 104. 104. 104. 104. 104.
## 2 2016-10-17 00:01:00 104. 104. 104. 104. 104. 104.
## 3 2016-10-17 00:02:00 104. 104. 104. 104. 104. 104.
## 4 2016-10-17 00:03:00 104. 104. 104. 104. 104. 104.
## 5 2016-10-17 00:04:00 104. 104. 104. 104. 104. 104.
## 6 2016-10-17 00:05:00 104. 104. 104. 104. 104. 104.
## 7 2016-10-17 00:06:00 104. 104. 104. 104. 104. 104.
## 8 2016-10-17 00:07:00 104. 104. 104. 104. 104. 104.
## 9 2016-10-17 00:08:00 104. 104. 104. 104. 104. 104.
## 10 2016-10-17 00:09:00 104. 104. 104. 104. 104. 104.
## # ... with 7,139 more rows, and 2 more variables: AskLow <dbl>,
## # AskClose <dbl>
data_m1_NA %>% md.pattern
## AskClose BidHigh BidLow AskOpen index AskLow BidOpen BidClose AskHigh
## 6528 1 1 1 1 1 1 1 1 1
## 79 1 1 1 1 1 1 1 1 0
## 78 1 1 1 1 1 1 1 0 1
## 72 1 1 1 1 1 1 0 1 1
## 1 1 1 1 1 1 1 0 1 0
## 2 1 1 1 1 1 1 0 0 1
## 71 1 1 1 1 1 0 1 1 1
## 66 1 1 1 1 0 1 1 1 1
## 62 1 1 1 0 1 1 1 1 1
## 1 1 1 1 0 1 1 1 1 0
## 1 1 1 1 0 1 0 1 1 1
## 1 1 1 1 0 1 0 1 0 1
## 1 1 1 1 0 0 1 1 1 1
## 62 1 1 0 1 1 1 1 1 1
## 1 1 1 0 1 1 1 1 1 0
## 1 1 1 0 1 0 1 1 1 1
## 1 1 1 0 0 1 1 1 1 1
## 56 1 0 1 1 1 1 1 1 1
## 1 1 0 1 1 1 1 1 1 0
## 1 1 0 1 1 1 1 1 0 1
## 1 1 0 1 1 1 1 0 1 1
## 1 1 0 1 1 0 1 1 1 1
## 1 1 0 0 1 1 1 1 1 1
## 54 0 1 1 1 1 1 1 1 1
## 1 0 1 1 1 1 1 1 1 0
## 1 0 1 1 1 1 1 1 0 1
## 2 0 1 1 1 1 0 1 1 1
## 1 0 1 1 1 0 1 1 1 1
## 1 0 1 1 0 1 1 1 1 1
## 60 61 66 68 70 75 76 83 84
##
## 6528 0
## 79 1
## 78 1
## 72 1
## 1 2
## 2 2
## 71 1
## 66 1
## 62 1
## 1 2
## 1 2
## 1 3
## 1 2
## 62 1
## 1 2
## 1 2
## 1 2
## 56 1
## 1 2
## 1 2
## 1 2
## 1 2
## 1 2
## 54 1
## 1 2
## 1 2
## 2 2
## 1 2
## 1 2
## 643
data_m1_NA %>% md.pairs
## $rr
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh AskLow
## index 7079 7003 7019 7014 6996 7012 6995 7004
## BidOpen 7003 7073 7013 7007 6992 7005 6990 6998
## BidHigh 7019 7013 7088 7023 7006 7020 7005 7013
## BidLow 7014 7007 7023 7083 7000 7016 7000 7008
## BidClose 6996 6992 7006 7000 7066 6999 6982 6992
## AskOpen 7012 7005 7020 7016 6999 7081 6998 7008
## AskHigh 6995 6990 7005 7000 6982 6998 7065 6990
## AskLow 7004 6998 7013 7008 6992 7008 6990 7074
## AskClose 7020 7013 7028 7023 7007 7022 7006 7016
## AskClose
## index 7020
## BidOpen 7013
## BidHigh 7028
## BidLow 7023
## BidClose 7007
## AskOpen 7022
## AskHigh 7006
## AskLow 7016
## AskClose 7089
##
## $rm
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh AskLow
## index 0 76 60 65 83 67 84 75
## BidOpen 70 0 60 66 81 68 83 75
## BidHigh 69 75 0 65 82 68 83 75
## BidLow 69 76 60 0 83 67 83 75
## BidClose 70 74 60 66 0 67 84 74
## AskOpen 69 76 61 65 82 0 83 73
## AskHigh 70 75 60 65 83 67 0 75
## AskLow 70 76 61 66 82 66 84 0
## AskClose 69 76 61 66 82 67 83 73
## AskClose
## index 59
## BidOpen 60
## BidHigh 60
## BidLow 60
## BidClose 59
## AskOpen 59
## AskHigh 59
## AskLow 58
## AskClose 0
##
## $mr
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh AskLow
## index 0 70 69 69 70 69 70 70
## BidOpen 76 0 75 76 74 76 75 76
## BidHigh 60 60 0 60 60 61 60 61
## BidLow 65 66 65 0 66 65 65 66
## BidClose 83 81 82 83 0 82 83 82
## AskOpen 67 68 68 67 67 0 67 66
## AskHigh 84 83 83 83 84 83 0 84
## AskLow 75 75 75 75 74 73 75 0
## AskClose 59 60 60 60 59 59 59 58
## AskClose
## index 69
## BidOpen 76
## BidHigh 61
## BidLow 66
## BidClose 82
## AskOpen 67
## AskHigh 83
## AskLow 73
## AskClose 0
##
## $mm
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh AskLow
## index 70 0 1 1 0 1 0 0
## BidOpen 0 76 1 0 2 0 1 0
## BidHigh 1 1 61 1 1 0 1 0
## BidLow 1 0 1 66 0 1 1 0
## BidClose 0 2 1 0 83 1 0 1
## AskOpen 1 0 0 1 1 68 1 2
## AskHigh 0 1 1 1 0 1 84 0
## AskLow 0 0 0 0 1 2 0 75
## AskClose 1 0 0 0 1 1 1 2
## AskClose
## index 1
## BidOpen 0
## BidHigh 0
## BidLow 0
## BidClose 1
## AskOpen 1
## AskHigh 1
## AskLow 2
## AskClose 60
data_tm1_NA <- data_tm1 %>%
dplyr::select(index, BidOpen, BidHigh, BidLow, BidClose, AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.01)
data_tm1_NA
## # A tibble: 1,628 x 9
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-08-07 00:02:00 111. 111. 111. 111. 111. 111.
## 2 2017-08-07 00:03:00 111. 111. 111. 111. 111. 111.
## 3 2017-08-07 00:04:00 111. 111. 111. 111. 111. 111.
## 4 2017-08-07 00:05:00 111. 111. 111. 111. 111. 111.
## 5 2017-08-07 00:06:00 111. 111. 111. 111. 111. 111.
## 6 2017-08-07 00:07:00 111. 111. 111. 111. 111. 111.
## 7 2017-08-07 00:08:00 111. 111. 111. 111. 111. 111.
## 8 2017-08-07 00:09:00 111. 111. 111. 111. 111. 111.
## 9 2017-08-07 00:11:00 111. 111. 111. 111. 111. 111.
## 10 2017-08-07 00:17:00 111. 111. NA 111. 111. 111.
## # ... with 1,618 more rows, and 2 more variables: AskLow <dbl>,
## # AskClose <dbl>
data_tm1_NA %>% md.pattern
## BidHigh BidClose AskLow BidOpen BidLow AskHigh index AskClose AskOpen
## 1485 1 1 1 1 1 1 1 1 1
## 18 1 1 1 1 1 1 1 1 0
## 18 1 1 1 1 1 1 1 0 1
## 17 1 1 1 1 1 1 0 1 1
## 15 1 1 1 1 1 0 1 1 1
## 1 1 1 1 1 1 0 0 1 1
## 16 1 1 1 1 0 1 1 1 1
## 1 1 1 1 1 0 1 1 1 0
## 17 1 1 1 0 1 1 1 1 1
## 16 1 1 0 1 1 1 1 1 1
## 12 1 0 1 1 1 1 1 1 1
## 11 0 1 1 1 1 1 1 1 1
## 1 0 1 1 1 1 0 1 1 1
## 12 12 16 17 17 17 18 18 19
##
## 1485 0
## 18 1
## 18 1
## 17 1
## 15 1
## 1 2
## 16 1
## 1 2
## 17 1
## 16 1
## 12 1
## 11 1
## 1 2
## 146
data_tm1_NA %>% md.pairs
## $rr
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh AskLow
## index 1610 1593 1598 1593 1598 1591 1594 1594
## BidOpen 1593 1611 1599 1594 1599 1592 1594 1595
## BidHigh 1598 1599 1616 1599 1604 1597 1600 1600
## BidLow 1593 1594 1599 1611 1599 1593 1594 1595
## BidClose 1598 1599 1604 1599 1616 1597 1599 1600
## AskOpen 1591 1592 1597 1593 1597 1609 1592 1593
## AskHigh 1594 1594 1600 1594 1599 1592 1611 1595
## AskLow 1594 1595 1600 1595 1600 1593 1595 1612
## AskClose 1592 1593 1598 1593 1598 1591 1593 1594
## AskClose
## index 1592
## BidOpen 1593
## BidHigh 1598
## BidLow 1593
## BidClose 1598
## AskOpen 1591
## AskHigh 1593
## AskLow 1594
## AskClose 1610
##
## $rm
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh AskLow
## index 0 17 12 17 12 19 16 16
## BidOpen 18 0 12 17 12 19 17 16
## BidHigh 18 17 0 17 12 19 16 16
## BidLow 18 17 12 0 12 18 17 16
## BidClose 18 17 12 17 0 19 17 16
## AskOpen 18 17 12 16 12 0 17 16
## AskHigh 17 17 11 17 12 19 0 16
## AskLow 18 17 12 17 12 19 17 0
## AskClose 18 17 12 17 12 19 17 16
## AskClose
## index 18
## BidOpen 18
## BidHigh 18
## BidLow 18
## BidClose 18
## AskOpen 18
## AskHigh 18
## AskLow 18
## AskClose 0
##
## $mr
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh AskLow
## index 0 18 18 18 18 18 17 18
## BidOpen 17 0 17 17 17 17 17 17
## BidHigh 12 12 0 12 12 12 11 12
## BidLow 17 17 17 0 17 16 17 17
## BidClose 12 12 12 12 0 12 12 12
## AskOpen 19 19 19 18 19 0 19 19
## AskHigh 16 17 16 17 17 17 0 17
## AskLow 16 16 16 16 16 16 16 0
## AskClose 18 18 18 18 18 18 18 18
## AskClose
## index 18
## BidOpen 17
## BidHigh 12
## BidLow 17
## BidClose 12
## AskOpen 19
## AskHigh 17
## AskLow 16
## AskClose 0
##
## $mm
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh AskLow
## index 18 0 0 0 0 0 1 0
## BidOpen 0 17 0 0 0 0 0 0
## BidHigh 0 0 12 0 0 0 1 0
## BidLow 0 0 0 17 0 1 0 0
## BidClose 0 0 0 0 12 0 0 0
## AskOpen 0 0 0 1 0 19 0 0
## AskHigh 1 0 1 0 0 0 17 0
## AskLow 0 0 0 0 0 0 0 16
## AskClose 0 0 0 0 0 0 0 0
## AskClose
## index 0
## BidOpen 0
## BidHigh 0
## BidLow 0
## BidClose 0
## AskOpen 0
## AskHigh 0
## AskLow 0
## AskClose 18
auto.arima
auto.arima
using xreg
and Forecasting Several ts Togetherauto.arima
Forecast with Multivariate xreg
- unexpected Resultsauto.arima
Warns NaNs
Produced on Std Errortttt <- data_m1_NA[-1] %>% amelia
## -- Imputation 1 --
##
## 1 2
##
## -- Imputation 2 --
##
## 1 2
##
## -- Imputation 3 --
##
## 1 2
##
## -- Imputation 4 --
##
## 1 2
##
## -- Imputation 5 --
##
## 1 2
llply(tttt$imputations, function(x) {
x %>% mutate(
VA = if_else(AskOpen <= AskHigh & AskOpen >= AskLow &
AskClose <= AskHigh & AskClose >= AskLow &
AskHigh >= AskLow, 1, 0),
VB = if_else(BidOpen <= BidHigh & BidOpen >= BidLow &
BidClose <= BidHigh & BidClose >= BidLow &
BidHigh >= BidLow, 1, 0)) %>%
dplyr::filter(VA == 0|VB == 0)
})
## $imp1
## # A tibble: 114 x 10
## BidOpen BidHigh BidLow BidClose AskOpen AskHigh AskLow AskClose VA
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 104. 104. 104. 104. 104. 104. 104. 104. 0
## 2 104. 104. 104. 104. 104. 104. 104. 104. 0
## 3 104. 104. 104. 104. 104. 104. 104. 104. 1
## 4 104. 104. 104. 104. 104. 104. 104. 104. 1
## 5 104. 104. 104. 104. 104. 104. 104. 104. 0
## 6 104. 104. 104. 104. 104. 104. 104. 104. 0
## 7 104. 104. 104. 104. 104. 104. 104. 104. 1
## 8 104. 104. 104. 104. 104. 104. 104. 104. 1
## 9 104. 104. 104. 104. 104. 104. 104. 104. 1
## 10 104. 104. 104. 104. 104. 104. 104. 104. 1
## # ... with 104 more rows, and 1 more variable: VB <dbl>
##
## $imp2
## # A tibble: 106 x 10
## BidOpen BidHigh BidLow BidClose AskOpen AskHigh AskLow AskClose VA
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 104. 104. 104. 104. 104. 104. 104. 104. 0
## 2 104. 104. 104. 104. 104. 104. 104. 104. 0
## 3 104. 104. 104. 104. 104. 104. 104. 104. 1
## 4 104. 104. 104. 104. 104. 104. 104. 104. 0
## 5 104. 104. 104. 104. 104. 104. 104. 104. 1
## 6 104. 104. 104. 104. 104. 104. 104. 104. 1
## 7 104. 104. 104. 104. 104. 104. 104. 104. 1
## 8 104. 104. 104. 104. 104. 104. 104. 104. 1
## 9 104. 104. 104. 104. 104. 104. 104. 104. 1
## 10 104. 104. 104. 104. 104. 104. 104. 104. 1
## # ... with 96 more rows, and 1 more variable: VB <dbl>
##
## $imp3
## # A tibble: 107 x 10
## BidOpen BidHigh BidLow BidClose AskOpen AskHigh AskLow AskClose VA
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 104. 104. 104. 104. 104. 104. 104. 104. 0
## 2 104. 104. 104. 104. 104. 104. 104. 104. 0
## 3 104. 104. 104. 104. 104. 104. 104. 104. 0
## 4 104. 104. 104. 104. 104. 104. 104. 104. 0
## 5 104. 104. 104. 104. 104. 104. 104. 104. 1
## 6 104. 104. 104. 104. 104. 104. 104. 104. 0
## 7 104. 104. 104. 104. 104. 104. 104. 104. 0
## 8 104. 104. 104. 104. 104. 104. 104. 104. 1
## 9 104. 104. 104. 104. 104. 104. 104. 104. 1
## 10 104. 104. 104. 104. 104. 104. 104. 104. 1
## # ... with 97 more rows, and 1 more variable: VB <dbl>
##
## $imp4
## # A tibble: 96 x 10
## BidOpen BidHigh BidLow BidClose AskOpen AskHigh AskLow AskClose VA
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 104. 104. 104. 104. 104. 104. 104. 104. 0
## 2 104. 104. 104. 104. 104. 104. 104. 104. 0
## 3 104. 104. 104. 104. 104. 104. 104. 104. 0
## 4 104. 104. 104. 104. 104. 104. 104. 104. 1
## 5 104. 104. 104. 104. 104. 104. 104. 104. 1
## 6 104. 104. 104. 104. 104. 104. 104. 104. 1
## 7 104. 104. 104. 104. 104. 104. 104. 104. 1
## 8 104. 104. 104. 104. 104. 104. 104. 104. 1
## 9 104. 104. 104. 104. 104. 104. 104. 104. 0
## 10 104 104. 104. 104. 104. 104. 104. 104. 0
## # ... with 86 more rows, and 1 more variable: VB <dbl>
##
## $imp5
## # A tibble: 95 x 10
## BidOpen BidHigh BidLow BidClose AskOpen AskHigh AskLow AskClose VA
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 104. 104. 104. 104. 104. 104. 104. 104. 0
## 2 104. 104. 104. 104. 104. 104. 104. 104. 1
## 3 104. 104. 104. 104. 104. 104. 104. 104. 0
## 4 104. 104. 104. 104. 104. 104. 104. 104. 1
## 5 104. 104. 104. 104. 104. 104. 104. 104. 0
## 6 104. 104. 104. 104. 104. 104. 104. 104. 1
## 7 104. 104. 104. 104. 104. 104. 104. 104. 1
## 8 104. 104. 104. 104. 104. 104. 104. 104. 1
## 9 104. 104. 104. 104. 104. 104. 104. 104. 1
## 10 104. 104. 104. 104. 104. 104. 104. 104. 1
## # ... with 85 more rows, and 1 more variable: VB <dbl>
经过测试以上数据,结果发现amelia
也是单变量数据弥补。
注释:单变量弥补的数据将会与之前单变量预测数据一样,就是出现偏差,例如:
以下使用imputeTS::na.seadec()
弥补1%数据缺失值。
data_m1_NA <- data_m1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.01) %>%
cbind(data_m1[1], .) %>% tbl_df
data_m1_1_impTS <- llply(algo, function(x) {
data_m1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_m1_1_impTS) <- algo
data_m1_1_impTS %<>% ldply %>% tbl_df
data_m1_1_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_1_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 702 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 2 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 3 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 4 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 5 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 6 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 7 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 8 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 9 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 10 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## # ... with 692 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_m1_1_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_1_impTS %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 0.0000006 | 0.0000008 | 0.0000009 | 0.0000013 | 0.0000010 | 0.0000009 | 0.0048958 | 0.0062946 | 0.0048958 | 0.0062946 |
kalman | 0.0000006 | 0.0000008 | 0.0000009 | 0.0000013 | 0.0000010 | 0.0000009 | 0.0050357 | 0.0064345 | 0.0050357 | 0.0064345 |
locf | 0.0000014 | 0.0000031 | 0.0000018 | 0.0000036 | 0.0000029 | 0.0000025 | 0.0037768 | 0.0053154 | 0.0043363 | 0.0058749 |
ma | 0.0000007 | 0.0000014 | 0.0000012 | 0.0000013 | 0.0000013 | 0.0000012 | 0.0079731 | 0.0088124 | 0.0067142 | 0.0086725 |
mean | 0.0005507 | 0.0005905 | 0.0004618 | 0.0006969 | 0.0005831 | 0.0005750 | 0.0194433 | 0.0179046 | 0.0205623 | 0.0194433 |
random | 0.0024076 | 0.0006038 | 0.0023222 | 0.0008296 | 0.0012519 | 0.0015408 | 0.0145475 | 0.0093719 | 0.0195832 | 0.0145475 |
以下使用Amelia::amelia()
弥补1%数据缺失值。
data_m1_1_amelia <- data_m1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2
##
## -- Imputation 2 --
##
## 1 2
##
## -- Imputation 3 --
##
## 1 2
##
## -- Imputation 4 --
##
## 1 2
##
## -- Imputation 5 --
##
## 1 2
data_m1_1_amelia %>% anyNA
## [1] FALSE
data_m1_1_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_1_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 237 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2016-10-17 00:37:00 104. 104. 104. 104. 104.
## 2 imp1 2016-10-17 02:44:00 104. 104. 104. 104. 104.
## 3 imp1 2016-10-17 02:46:00 104. 104. 104. 104. 104.
## 4 imp1 2016-10-17 07:04:00 104. 104. 104. 104. 104.
## 5 imp1 2016-10-17 12:23:00 104. 104. 104. 104. 104.
## 6 imp1 2016-10-17 16:06:00 104. 104. 104. 104. 104.
## 7 imp1 2016-10-17 17:35:00 104. 104. 104. 104. 104.
## 8 imp1 2016-10-17 19:19:00 104. 104. 104. 104. 104.
## 9 imp1 2016-10-17 23:38:00 104. 104. 104. 104. 104.
## 10 imp1 2016-10-18 00:27:00 104. 104. 104. 104. 104.
## # ... with 227 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_m1_1_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_1_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 0 | 1e-07 | 1e-07 | 3e-07 | 1e-07 | 1e-07 | 0.0039166 | 0.0034970 | 0.0030774 | 0.0029375 |
imp2 | 0 | 1e-07 | 0e+00 | 2e-07 | 1e-07 | 1e-07 | 0.0032172 | 0.0034970 | 0.0029375 | 0.0036369 |
imp3 | 0 | 0e+00 | 0e+00 | 2e-07 | 1e-07 | 1e-07 | 0.0036369 | 0.0033571 | 0.0030774 | 0.0032172 |
imp4 | 0 | 1e-07 | 0e+00 | 2e-07 | 1e-07 | 1e-07 | 0.0037768 | 0.0036369 | 0.0029375 | 0.0032172 |
imp5 | 0 | 0e+00 | 0e+00 | 2e-07 | 1e-07 | 1e-07 | 0.0043363 | 0.0043363 | 0.0034970 | 0.0040565 |
以下使用tidyr::fill()
弥补1%数据缺失值。
data_m1_1_tidyr <- data_m1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_m1_1_tidyr %>% anyNA
## [1] FALSE
data_m1_1_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_1_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 69 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-10-17 02:25:00 104. 104. 104. 104. 104. 104.
## 2 2016-10-17 02:34:00 104. 104. 104. 104. 104. 104.
## 3 2016-10-17 03:52:00 104. 104. 104. 104. 104. 104.
## 4 2016-10-17 04:04:00 104. 104. 104. 104. 104. 104.
## 5 2016-10-17 04:46:00 104. 104. 104. 104. 104. 104.
## 6 2016-10-17 08:00:00 104. 104. 104. 104. 104. 104.
## 7 2016-10-17 10:16:00 104. 104. 104. 104. 104. 104.
## 8 2016-10-17 12:23:00 104. 104. 104. 104. 104. 104.
## 9 2016-10-17 14:11:00 104. 104. 104. 104. 104. 104.
## 10 2016-10-17 16:58:00 104. 104. 104. 104. 104. 104.
## # ... with 59 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_m1_1_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_1_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
1.4e-06 | 3.1e-06 | 1.8e-06 | 3.6e-06 | 2.9e-06 | 2.5e-06 | 0.0037768 | 0.0053154 | 0.0043363 | 0.0058749 |
以下使用imputeTS::na.seadec()
弥补1%数据缺失值。
data_tm1_NA <- data_tm1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.01) %>%
cbind(data_tm1[1], .) %>% tbl_df
data_tm1_1_impTS <- llply(algo, function(x) {
data_tm1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_tm1_1_impTS) <- algo
data_tm1_1_impTS %<>% ldply %>% tbl_df
data_tm1_1_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_1_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 143 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 2 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 3 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 4 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 5 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 6 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 7 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 8 locf 111. 111. 111. 111. 111. 111. 111. 111.
## 9 locf 111. 111. 111. 111. 111. 111. 111. 111.
## 10 locf 111. 111. 111. 111. 111. 111. 111. 111.
## # ... with 133 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_tm1_1_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_1_impTS %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 48.06478 | 48.03953 | 48.09285 | 48.06702 | 48.06647 | 48.06605 | 0.0024570 | 0.0012285 | 0.0030713 | 0.0018428 |
kalman | 48.06484 | 48.03953 | 48.09285 | 48.06702 | 48.06646 | 48.06606 | 0.0030713 | 0.0024570 | 0.0030713 | 0.0024570 |
locf | 48.06471 | 48.03943 | 48.09250 | 48.06706 | 48.06633 | 48.06592 | 0.0079853 | 0.0067568 | 0.0030713 | 0.0024570 |
ma | 48.06507 | 48.03947 | 48.09286 | 48.06706 | 48.06647 | 48.06612 | 0.0055283 | 0.0055283 | 0.0042998 | 0.0055283 |
mean | 48.06836 | 48.03966 | 48.09137 | 48.06870 | 48.06658 | 48.06702 | 0.0165848 | 0.0184275 | 0.0128993 | 0.0159705 |
random | 48.06803 | 48.03820 | 48.10323 | 48.06020 | 48.06721 | 48.06742 | 0.0221130 | 0.0178133 | 0.0233415 | 0.0196560 |
以下使用Amelia::amelia()
弥补1%数据缺失值。
data_tm1_1_amelia <- data_tm1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2
##
## -- Imputation 2 --
##
## 1 2
##
## -- Imputation 3 --
##
## 1 2
##
## -- Imputation 4 --
##
## 1 2
##
## -- Imputation 5 --
##
## 1 2
data_tm1_1_amelia %>% anyNA
## [1] FALSE
data_tm1_1_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_1_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 67 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2017-08-07 03:37:00 111. 111. 111. 111. 111.
## 2 imp1 2017-08-07 04:59:00 111. 111. 111. 111. 111.
## 3 imp1 2017-08-07 05:33:00 111. 111. 111. 111. 111.
## 4 imp1 2017-08-07 07:13:00 111. 111. 111. 111. 111.
## 5 imp1 2017-08-07 11:32:00 111. 111. 111. 111. 111.
## 6 imp1 2017-08-07 13:46:00 111. 111. 111. 111. 111.
## 7 imp1 2017-08-07 18:02:00 111. 111. 111. 111. 111.
## 8 imp1 2017-08-07 18:38:00 111. 111. 111. 111. 111.
## 9 imp1 2017-08-07 20:20:00 111. 111. 111. 111. 111.
## 10 imp1 2017-08-07 20:25:00 111. 111. 111. 111. 111.
## # ... with 57 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_tm1_1_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_1_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 48.06482 | 48.03931 | 48.09267 | 48.06713 | 48.06637 | 48.06598 | 0.0055283 | 0.0055283 | 0.0030713 | 0.0030713 |
imp2 | 48.06482 | 48.03929 | 48.09266 | 48.06683 | 48.06626 | 48.06590 | 0.0055283 | 0.0042998 | 0.0055283 | 0.0049140 |
imp3 | 48.06477 | 48.03933 | 48.09253 | 48.06716 | 48.06634 | 48.06595 | 0.0036855 | 0.0036855 | 0.0024570 | 0.0024570 |
imp4 | 48.06483 | 48.03946 | 48.09270 | 48.06697 | 48.06638 | 48.06599 | 0.0036855 | 0.0030713 | 0.0042998 | 0.0036855 |
imp5 | 48.06493 | 48.03940 | 48.09263 | 48.06691 | 48.06631 | 48.06597 | 0.0049140 | 0.0042998 | 0.0049140 | 0.0042998 |
以下使用tidyr::fill()
弥补1%数据缺失值。
data_tm1_1_tidyr <- data_tm1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_tm1_1_tidyr %>% anyNA
## [1] FALSE
data_tm1_1_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_1_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 16 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-08-07 01:05:00 111. 111. 111. 111. 111. 111.
## 2 2017-08-07 01:26:00 111. 111. 111. 111. 111. 111.
## 3 2017-08-07 04:59:00 111. 111. 111. 111. 111. 111.
## 4 2017-08-07 05:14:00 111. 111. 111. 111. 111. 111.
## 5 2017-08-07 09:31:00 111. 111. 111. 111. 111. 111.
## 6 2017-08-07 11:53:00 111. 111. 111. 111. 111. 111.
## 7 2017-08-07 13:06:00 111. 111. 111. 111. 111. 111.
## 8 2017-08-07 16:49:00 111. 111. 111. 111. 111. 111.
## 9 2017-08-07 18:02:00 111. 111. 111. 111. 111. 111.
## 10 2017-08-07 18:07:00 111. 111. 111. 111. 111. 111.
## 11 2017-08-07 18:38:00 111. 111. 111. 111. 111. 111.
## 12 2017-08-07 18:53:00 111. 111. 111. 111. 111. 111.
## 13 2017-08-07 20:47:00 111. 111. 111. 111. 111. 111.
## 14 2017-08-07 23:18:00 111. 111. 111. 111. 111. 111.
## 15 2017-08-08 02:07:00 111. 111. 111. 111. 111. 111.
## 16 2017-08-08 03:28:00 111. 111. 111. 111. 111. 111.
## # ... with 6 more variables: AskLow <dbl>, AskClose <dbl>,
## # bias.open <dbl>, bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_tm1_1_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_1_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
48.06471 | 48.03943 | 48.0925 | 48.06706 | 48.06633 | 48.06592 | 0.0079853 | 0.0067568 | 0.0030713 | 0.002457 |
以下使用imputeTS::na.seadec()
弥补10%数据缺失值。
data_m1_NA <- data_m1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.1) %>%
cbind(data_m1[1], .) %>% tbl_df
data_m1_10_impTS <- llply(algo, function(x) {
data_m1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_m1_10_impTS) <- algo
data_m1_10_impTS %<>% ldply %>% tbl_df
data_m1_10_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_10_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 6,105 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 2 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 3 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 4 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 5 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 6 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 7 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 8 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 9 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 10 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## # ... with 6,095 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_m1_10_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_10_impTS %>%
kable(caption = 'MSE 10% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 0.0000090 | 0.0000071 | 0.0000075 | 0.0000095 | 0.0000080 | 0.0000083 | 0.0495174 | 0.0479787 | 0.0492377 | 0.0496573 |
kalman | 0.0000090 | 0.0000071 | 0.0000075 | 0.0000096 | 0.0000081 | 0.0000083 | 0.0513359 | 0.0500769 | 0.0509162 | 0.0517555 |
locf | 0.0000190 | 0.0000160 | 0.0000167 | 0.0000208 | 0.0000178 | 0.0000181 | 0.0530144 | 0.0507763 | 0.0471395 | 0.0464401 |
ma | 0.0000108 | 0.0000100 | 0.0000107 | 0.0000129 | 0.0000112 | 0.0000111 | 0.0672821 | 0.0636453 | 0.0614072 | 0.0665827 |
mean | 0.0057660 | 0.0063513 | 0.0060217 | 0.0066473 | 0.0063401 | 0.0061966 | 0.1719122 | 0.1493915 | 0.1814240 | 0.1751294 |
random | 0.0077868 | 0.0226350 | 0.0087245 | 0.0291799 | 0.0201798 | 0.0170816 | 0.1092460 | 0.0870052 | 0.1375017 | 0.1214156 |
以下使用Amelia::amelia()
弥补10%数据缺失值。
data_m1_10_amelia <- data_m1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2
##
## -- Imputation 2 --
##
## 1 2
##
## -- Imputation 3 --
##
## 1 2
##
## -- Imputation 4 --
##
## 1 2
##
## -- Imputation 5 --
##
## 1 2
data_m1_10_amelia %>% anyNA
## [1] FALSE
data_m1_10_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_10_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 2,601 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2016-10-17 00:01:00 104. 104. 104. 104. 104.
## 2 imp1 2016-10-17 00:02:00 104. 104. 104. 104. 104.
## 3 imp1 2016-10-17 00:11:00 104. 104. 104. 104. 104.
## 4 imp1 2016-10-17 00:12:00 104. 104. 104. 104. 104.
## 5 imp1 2016-10-17 00:13:00 104. 104. 104. 104. 104.
## 6 imp1 2016-10-17 00:16:00 104. 104. 104. 104. 104.
## 7 imp1 2016-10-17 00:22:00 104. 104. 104. 104. 104.
## 8 imp1 2016-10-17 00:30:00 104. 104. 104. 104. 104.
## 9 imp1 2016-10-17 00:36:00 104. 104. 104. 104. 104.
## 10 imp1 2016-10-17 00:39:00 104. 104. 104. 104. 104.
## # ... with 2,591 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_m1_10_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_10_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 1.0e-06 | 1.3e-06 | 1.1e-06 | 1.1e-06 | 1.2e-06 | 1.1e-06 | 0.0430829 | 0.0442020 | 0.0348300 | 0.0386068 |
imp2 | 1.1e-06 | 9.0e-07 | 1.1e-06 | 9.0e-07 | 1.0e-06 | 1.0e-06 | 0.0432228 | 0.0395860 | 0.0372080 | 0.0365086 |
imp3 | 9.0e-07 | 1.2e-06 | 1.1e-06 | 1.0e-06 | 1.1e-06 | 1.1e-06 | 0.0440621 | 0.0432228 | 0.0370681 | 0.0381872 |
imp4 | 1.2e-06 | 1.1e-06 | 1.1e-06 | 1.0e-06 | 1.1e-06 | 1.1e-06 | 0.0402854 | 0.0415443 | 0.0345503 | 0.0379074 |
imp5 | 1.1e-06 | 1.1e-06 | 1.1e-06 | 9.0e-07 | 1.0e-06 | 1.1e-06 | 0.0416842 | 0.0401455 | 0.0348300 | 0.0359491 |
以下使用tidyr::fill()
弥补10%数据缺失值。
data_m1_10_tidyr <- data_m1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_m1_10_tidyr %>% anyNA
## [1] FALSE
data_m1_10_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_10_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 674 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-10-17 00:01:00 104. 104. 104. 104. 104. 104.
## 2 2016-10-17 00:31:00 104. 104. 104. 104. 104. 104.
## 3 2016-10-17 01:20:00 104. 104. 104. 104. 104. 104.
## 4 2016-10-17 01:30:00 104. 104. 104. 104. 104. 104.
## 5 2016-10-17 01:37:00 104. 104. 104. 104. 104. 104.
## 6 2016-10-17 01:40:00 104. 104. 104. 104. 104. 104.
## 7 2016-10-17 01:49:00 104. 104. 104. 104. 104. 104.
## 8 2016-10-17 01:57:00 104. 104. 104. 104. 104. 104.
## 9 2016-10-17 02:19:00 104. 104. 104. 104. 104. 104.
## 10 2016-10-17 02:25:00 104. 104. 104. 104. 104. 104.
## # ... with 664 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_m1_10_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_10_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
1.9e-05 | 1.6e-05 | 1.67e-05 | 2.08e-05 | 1.78e-05 | 1.81e-05 | 0.0530144 | 0.0507763 | 0.0471395 | 0.0464401 |
以下使用imputeTS::na.seadec()
弥补10%数据缺失值。
data_tm1_NA <- data_tm1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.1) %>%
cbind(data_tm1[1], .) %>% tbl_df
data_tm1_10_impTS <- llply(algo, function(x) {
data_tm1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_tm1_10_impTS) <- algo
data_tm1_10_impTS %<>% ldply %>% tbl_df
data_tm1_10_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_10_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 1,411 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 2 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 3 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 4 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 5 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 6 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 7 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 8 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 9 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 10 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## # ... with 1,401 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_tm1_10_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_10_impTS %>%
kable(caption = 'MSE 10% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 48.06382 | 48.03829 | 48.09319 | 48.06827 | 48.06658 | 48.06589 | 0.0546683 | 0.0485258 | 0.0454545 | 0.0423833 |
kalman | 48.06387 | 48.03828 | 48.09319 | 48.06821 | 48.06656 | 48.06589 | 0.0528256 | 0.0491400 | 0.0448403 | 0.0448403 |
locf | 48.06388 | 48.04010 | 48.09277 | 48.06879 | 48.06722 | 48.06639 | 0.0589681 | 0.0485258 | 0.0558968 | 0.0472973 |
ma | 48.06389 | 48.03796 | 48.09338 | 48.06803 | 48.06646 | 48.06581 | 0.0626536 | 0.0540541 | 0.0540541 | 0.0552826 |
mean | 48.07145 | 48.03286 | 48.10848 | 48.06614 | 48.06916 | 48.06973 | 0.1726044 | 0.1603194 | 0.1572482 | 0.1664619 |
random | 47.91482 | 48.20938 | 48.19080 | 48.04397 | 48.14805 | 48.08974 | 0.1848894 | 0.1025799 | 0.2291155 | 0.1633907 |
以下使用Amelia::amelia()
弥补10%数据缺失值。
data_tm1_10_amelia <- data_tm1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4
##
## -- Imputation 2 --
##
## 1 2 3
##
## -- Imputation 3 --
##
## 1 2 3
##
## -- Imputation 4 --
##
## 1 2 3
##
## -- Imputation 5 --
##
## 1 2 3
data_tm1_10_amelia %>% anyNA
## [1] FALSE
data_tm1_10_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_10_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 627 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2017-08-07 00:44:00 111. 111. 111. 111. 111.
## 2 imp1 2017-08-07 00:45:00 111. 111. 111. 111. 111.
## 3 imp1 2017-08-07 00:57:00 111. 111. 111. 111. 111.
## 4 imp1 2017-08-07 01:07:00 111. 111. 111. 111. 111.
## 5 imp1 2017-08-07 01:18:00 111. 111. 111. 111. 111.
## 6 imp1 2017-08-07 01:21:00 111. 111. 111. 111. 111.
## 7 imp1 2017-08-07 01:33:00 111. 111. 111. 111. 111.
## 8 imp1 2017-08-07 01:36:00 111. 111. 111. 111. 111.
## 9 imp1 2017-08-07 02:07:00 111. 111. 111. 111. 111.
## 10 imp1 2017-08-07 02:09:00 111. 111. 111. 111. 111.
## # ... with 617 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_tm1_10_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_10_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 48.06460 | 48.03961 | 48.09248 | 48.06715 | 48.06642 | 48.06596 | 0.0479115 | 0.0337838 | 0.0411548 | 0.0307125 |
imp2 | 48.06486 | 48.03910 | 48.09271 | 48.06702 | 48.06627 | 48.06592 | 0.0423833 | 0.0393120 | 0.0337838 | 0.0337838 |
imp3 | 48.06492 | 48.03909 | 48.09336 | 48.06684 | 48.06643 | 48.06605 | 0.0472973 | 0.0411548 | 0.0509828 | 0.0485258 |
imp4 | 48.06482 | 48.03913 | 48.09300 | 48.06658 | 48.06624 | 48.06588 | 0.0466830 | 0.0386978 | 0.0417690 | 0.0350123 |
imp5 | 48.06506 | 48.03883 | 48.09322 | 48.06684 | 48.06630 | 48.06599 | 0.0565111 | 0.0466830 | 0.0448403 | 0.0399263 |
以下使用tidyr::fill()
弥补10%数据缺失值。
data_tm1_10_tidyr <- data_tm1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_tm1_10_tidyr %>% anyNA
## [1] FALSE
data_tm1_10_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_10_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 164 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-08-07 00:05:00 111. 111. 111. 111. 111. 111.
## 2 2017-08-07 00:17:00 111. 111. 111. 111. 111. 111.
## 3 2017-08-07 00:44:00 111. 111. 111. 111. 111. 111.
## 4 2017-08-07 00:45:00 111. 111. 111. 111. 111. 111.
## 5 2017-08-07 01:03:00 111. 111. 111. 111. 111. 111.
## 6 2017-08-07 01:04:00 111. 111. 111. 111. 111. 111.
## 7 2017-08-07 01:11:00 111. 111. 111. 111. 111. 111.
## 8 2017-08-07 01:21:00 111. 111. 111. 111. 111. 111.
## 9 2017-08-07 02:09:00 111. 111. 111. 111. 111. 111.
## 10 2017-08-07 02:32:00 111. 111. 111. 111. 111. 111.
## # ... with 154 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_tm1_10_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_10_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
48.06388 | 48.0401 | 48.09277 | 48.06879 | 48.06722 | 48.06639 | 0.0589681 | 0.0485258 | 0.0558968 | 0.0472973 |
以下使用imputeTS::na.seadec()
弥补20%数据缺失值。
data_m1_NA <- data_m1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.2) %>%
cbind(data_m1[1], .) %>% tbl_df
data_m1_20_impTS <- llply(algo, function(x) {
data_m1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_m1_20_impTS) <- algo
data_m1_20_impTS %<>% ldply %>% tbl_df
data_m1_20_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_20_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 11,975 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 2 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 3 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 4 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 5 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 6 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 7 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 8 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 9 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 10 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## # ... with 11,965 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_m1_20_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_20_impTS %>%
kable(caption = 'MSE 20% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 0.0000229 | 0.0000146 | 0.0000154 | 0.0000201 | 0.0000167 | 0.0000182 | 0.1054693 | 0.0988950 | 0.0949783 | 0.0948384 |
kalman | 0.0000229 | 0.0000146 | 0.0000154 | 0.0000202 | 0.0000167 | 0.0000183 | 0.1088264 | 0.1022521 | 0.0976360 | 0.0986152 |
locf | 0.0000526 | 0.0000327 | 0.0000319 | 0.0000438 | 0.0000362 | 0.0000403 | 0.1105050 | 0.0969366 | 0.0981956 | 0.0916212 |
ma | 0.0000275 | 0.0000192 | 0.0000198 | 0.0000245 | 0.0000212 | 0.0000228 | 0.1292488 | 0.1209959 | 0.1112044 | 0.1222549 |
mean | 0.0123887 | 0.0117783 | 0.0116303 | 0.0126437 | 0.0120174 | 0.0121103 | 0.3066163 | 0.2652119 | 0.2957057 | 0.3014408 |
random | 0.0308892 | 0.0318163 | 0.0327874 | 0.0353737 | 0.0333258 | 0.0327167 | 0.3551546 | 0.3561337 | 0.3119317 | 0.3477409 |
以下使用Amelia::amelia()
弥20%数据缺失值。
data_m1_20_amelia <- data_m1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3
##
## -- Imputation 2 --
##
## 1 2 3
##
## -- Imputation 3 --
##
## 1 2 3
##
## -- Imputation 4 --
##
## 1 2 3
##
## -- Imputation 5 --
##
## 1 2 3
data_m1_20_amelia %>% anyNA
## [1] FALSE
data_m1_20_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_20_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 5,306 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2016-10-17 00:00:00 104. 104. 104. 104. 104.
## 2 imp1 2016-10-17 00:10:00 104. 104. 104. 104. 104.
## 3 imp1 2016-10-17 00:13:00 104. 104. 104. 104. 104.
## 4 imp1 2016-10-17 00:16:00 104. 104. 104. 104. 104.
## 5 imp1 2016-10-17 00:17:00 104. 104. 104. 104. 104.
## 6 imp1 2016-10-17 00:27:00 104. 104. 104. 104. 104.
## 7 imp1 2016-10-17 00:33:00 104. 104. 104. 104. 104.
## 8 imp1 2016-10-17 00:38:00 104. 104. 104. 104. 104.
## 9 imp1 2016-10-17 00:39:00 104. 104. 104. 104. 104.
## 10 imp1 2016-10-17 00:41:00 104. 104. 104. 104. 104.
## # ... with 5,296 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_m1_20_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_20_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 5.1e-06 | 3.9e-06 | 4.4e-06 | 3.9e-06 | 4.1e-06 | 4.3e-06 | 0.0969366 | 0.0854665 | 0.0762344 | 0.0698000 |
imp2 | 5.0e-06 | 3.9e-06 | 3.6e-06 | 3.7e-06 | 3.7e-06 | 4.0e-06 | 0.0952581 | 0.0830885 | 0.0769338 | 0.0706393 |
imp3 | 4.6e-06 | 3.5e-06 | 4.5e-06 | 3.8e-06 | 3.9e-06 | 4.1e-06 | 0.0951182 | 0.0811302 | 0.0819695 | 0.0742761 |
imp4 | 4.4e-06 | 3.9e-06 | 4.0e-06 | 3.5e-06 | 3.8e-06 | 3.9e-06 | 0.0952581 | 0.0853266 | 0.0769338 | 0.0730172 |
imp5 | 4.2e-06 | 3.8e-06 | 3.7e-06 | 3.4e-06 | 3.6e-06 | 3.8e-06 | 0.0959575 | 0.0844873 | 0.0770737 | 0.0706393 |
以下使用tidyr::fill()
弥补20%数据缺失值。
data_m1_20_tidyr <- data_m1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_m1_20_tidyr %>% anyNA
## [1] FALSE
data_m1_20_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_20_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 1,314 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-10-17 00:00:00 104. 104. 104. 104. 104. 104.
## 2 2016-10-17 00:03:00 104. 104. 104. 104. 104. 104.
## 3 2016-10-17 00:17:00 104. 104. 104. 104. 104. 104.
## 4 2016-10-17 00:19:00 104. 104. 104. 104. 104. 104.
## 5 2016-10-17 00:20:00 104. 104. 104. 104. 104. 104.
## 6 2016-10-17 00:21:00 104. 104. 104. 104. 104. 104.
## 7 2016-10-17 00:31:00 104. 104. 104. 104. 104. 104.
## 8 2016-10-17 00:33:00 104. 104. 104. 104. 104. 104.
## 9 2016-10-17 00:42:00 104. 104. 104. 104. 104. 104.
## 10 2016-10-17 00:55:00 104. 104. 104. 104. 104. 104.
## # ... with 1,304 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_m1_20_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_20_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
5.26e-05 | 3.27e-05 | 3.19e-05 | 4.38e-05 | 3.62e-05 | 4.03e-05 | 0.110505 | 0.0969366 | 0.0981956 | 0.0916212 |
以下使用imputeTS::na.seadec()
弥补20%数据缺失值。
data_tm1_NA <- data_tm1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.2) %>%
cbind(data_tm1[1], .) %>% tbl_df
data_tm1_20_impTS <- llply(algo, function(x) {
data_tm1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_tm1_20_impTS) <- algo
data_tm1_20_impTS %<>% ldply %>% tbl_df
data_tm1_20_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_20_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 2,538 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 2 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 3 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 4 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 5 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 6 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 7 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 8 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 9 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 10 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## # ... with 2,528 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_tm1_20_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_20_impTS %>%
kable(caption = 'MSE 20% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 48.06431 | 48.03889 | 48.09359 | 48.06921 | 48.06723 | 48.06650 | 0.0872236 | 0.0896806 | 0.0755528 | 0.0853808 |
kalman | 48.06438 | 48.03886 | 48.09359 | 48.06910 | 48.06718 | 48.06648 | 0.0927518 | 0.0945946 | 0.0816953 | 0.0933661 |
locf | 48.06368 | 48.03972 | 48.09354 | 48.06721 | 48.06683 | 48.06604 | 0.1068796 | 0.0976658 | 0.0970516 | 0.0952088 |
ma | 48.06454 | 48.03813 | 48.09355 | 48.06860 | 48.06676 | 48.06620 | 0.1087224 | 0.1068796 | 0.0939803 | 0.1081081 |
mean | 48.07973 | 48.02832 | 48.08681 | 48.08731 | 48.06748 | 48.07054 | 0.2782555 | 0.2721130 | 0.2395577 | 0.2800983 |
random | 48.45310 | 48.01260 | 48.05931 | 47.86129 | 47.97773 | 48.09657 | 0.3366093 | 0.3218673 | 0.3126536 | 0.3359951 |
以下使用Amelia::amelia()
弥补20%数据缺失值。
data_tm1_20_amelia <- data_tm1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12
##
## -- Imputation 3 --
##
## 1 2 3 4 5
##
## -- Imputation 4 --
##
## 1 2 3 4
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7
data_tm1_20_amelia %>% anyNA
## [1] FALSE
data_tm1_20_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_20_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 1,293 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2017-08-07 00:24:00 111. 111. 111. 111. 111.
## 2 imp1 2017-08-07 00:41:00 111. 111. 111. 111. 111.
## 3 imp1 2017-08-07 00:44:00 111. 111. 111. 111. 111.
## 4 imp1 2017-08-07 00:50:00 111. 111. 111. 111. 111.
## 5 imp1 2017-08-07 00:51:00 111. 111. 111. 111. 111.
## 6 imp1 2017-08-07 01:01:00 111. 111. 111. 111. 111.
## 7 imp1 2017-08-07 01:19:00 111. 111. 111. 111. 111.
## 8 imp1 2017-08-07 01:20:00 111. 111. 111. 111. 111.
## 9 imp1 2017-08-07 01:22:00 111. 111. 111. 111. 111.
## 10 imp1 2017-08-07 01:26:00 111. 111. 111. 111. 111.
## # ... with 1,283 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_tm1_20_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_20_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 48.06460 | 48.03932 | 48.09376 | 48.06750 | 48.06686 | 48.06629 | 0.1056511 | 0.0970516 | 0.0970516 | 0.0982801 |
imp2 | 48.06493 | 48.03925 | 48.09323 | 48.06796 | 48.06682 | 48.06635 | 0.0921376 | 0.0884521 | 0.0823096 | 0.0902948 |
imp3 | 48.06402 | 48.04021 | 48.09313 | 48.06711 | 48.06682 | 48.06612 | 0.0902948 | 0.0743243 | 0.0939803 | 0.0859951 |
imp4 | 48.06378 | 48.03896 | 48.09348 | 48.06752 | 48.06665 | 48.06593 | 0.0921376 | 0.0804668 | 0.0878378 | 0.0853808 |
imp5 | 48.06468 | 48.03944 | 48.09354 | 48.06753 | 48.06684 | 48.06630 | 0.1007371 | 0.0786241 | 0.0853808 | 0.0687961 |
以下使用tidyr::fill()
弥补20%数据缺失值。
data_tm1_20_tidyr <- data_tm1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_tm1_20_tidyr %>% anyNA
## [1] FALSE
data_tm1_20_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_20_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 291 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-08-07 00:02:00 111. 111. 111. 111. 111. 111.
## 2 2017-08-07 00:03:00 111. 111. 111. 111. 111. 111.
## 3 2017-08-07 00:04:00 111. 111. 111. 111. 111. 111.
## 4 2017-08-07 00:05:00 111. 111. 111. 111. 111. 111.
## 5 2017-08-07 00:17:00 111. 111. 111. 111. 111. 111.
## 6 2017-08-07 00:22:00 111. 111. 111. 111. 111. 111.
## 7 2017-08-07 00:24:00 111. 111. 111. 111. 111. 111.
## 8 2017-08-07 00:41:00 111. 111. 111. 111. 111. 111.
## 9 2017-08-07 00:44:00 111. 111. 111. 111. 111. 111.
## 10 2017-08-07 01:00:00 111. 111. 111. 111. 111. 111.
## # ... with 281 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_tm1_20_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_20_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
48.06368 | 48.03972 | 48.09354 | 48.06721 | 48.06683 | 48.06604 | 0.1068796 | 0.0976658 | 0.0970516 | 0.0952088 |
以下使用imputeTS::na.seadec()
弥补30%数据缺失值。
data_m1_NA <- data_m1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.3) %>%
cbind(data_m1[1], .) %>% tbl_df
data_m1_30_impTS <- llply(algo, function(x) {
data_m1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_m1_30_impTS) <- algo
data_m1_30_impTS %<>% ldply %>% tbl_df
data_m1_30_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_30_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 16,045 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 2 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 3 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 4 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 5 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 6 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 7 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 8 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 9 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 10 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## # ... with 16,035 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_m1_30_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_30_impTS %>%
kable(caption = 'MSE 30% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 0.0000374 | 0.0000247 | 0.0000282 | 0.0000342 | 0.0000290 | 0.0000311 | 0.1573647 | 0.1435166 | 0.1454749 | 0.1502308 |
kalman | 0.0000373 | 0.0000247 | 0.0000282 | 0.0000344 | 0.0000291 | 0.0000312 | 0.1603021 | 0.1458945 | 0.1481326 | 0.1537278 |
locf | 0.0000830 | 0.0000646 | 0.0000672 | 0.0000785 | 0.0000701 | 0.0000733 | 0.1658973 | 0.1559659 | 0.1465939 | 0.1577843 |
ma | 0.0000425 | 0.0000321 | 0.0000351 | 0.0000407 | 0.0000359 | 0.0000376 | 0.1805847 | 0.1612813 | 0.1653378 | 0.1772276 |
mean | 0.0190881 | 0.0174648 | 0.0182534 | 0.0190274 | 0.0182485 | 0.0184584 | 0.4018744 | 0.3536159 | 0.3729193 | 0.4076095 |
random | 0.0891399 | 0.0503076 | 0.0991078 | 0.0534894 | 0.0676349 | 0.0730112 | 0.4879004 | 0.5048258 | 0.3189257 | 0.3983774 |
以下使用Amelia::amelia()
弥补30%数据缺失值。
data_m1_30_amelia <- data_m1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4
##
## -- Imputation 2 --
##
## 1 2 3 4
##
## -- Imputation 3 --
##
## 1 2 3 4
##
## -- Imputation 4 --
##
## 1 2 3 4
##
## -- Imputation 5 --
##
## 1 2 3 4
data_m1_30_amelia %>% anyNA
## [1] FALSE
data_m1_30_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_30_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 7,961 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2016-10-17 00:00:00 104. 104. 104. 104. 104.
## 2 imp1 2016-10-17 00:03:00 104. 104. 104. 104. 104.
## 3 imp1 2016-10-17 00:07:00 104. 104. 104. 104. 104.
## 4 imp1 2016-10-17 00:08:00 104. 104. 104. 104. 104.
## 5 imp1 2016-10-17 00:09:00 104. 104. 104. 104. 104.
## 6 imp1 2016-10-17 00:10:00 104. 104. 104. 104. 104.
## 7 imp1 2016-10-17 00:11:00 104. 104. 104. 104. 104.
## 8 imp1 2016-10-17 00:16:00 104. 104. 104. 104. 104.
## 9 imp1 2016-10-17 00:17:00 104. 104. 104. 104. 104.
## 10 imp1 2016-10-17 00:23:00 104. 104. 104. 104. 104.
## # ... with 7,951 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_m1_30_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_30_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 1.12e-05 | 8.6e-06 | 9.7e-06 | 8.2e-06 | 8.8e-06 | 9.4e-06 | 0.1486921 | 0.1268709 | 0.1289691 | 0.1187579 |
imp2 | 1.06e-05 | 7.7e-06 | 9.2e-06 | 8.0e-06 | 8.3e-06 | 8.9e-06 | 0.1419779 | 0.1198769 | 0.1221150 | 0.1113442 |
imp3 | 1.05e-05 | 7.9e-06 | 9.4e-06 | 8.0e-06 | 8.4e-06 | 9.0e-06 | 0.1437963 | 0.1226745 | 0.1253322 | 0.1134424 |
imp4 | 1.12e-05 | 8.9e-06 | 9.2e-06 | 8.1e-06 | 8.8e-06 | 9.4e-06 | 0.1421178 | 0.1278500 | 0.1281298 | 0.1249126 |
imp5 | 1.17e-05 | 8.3e-06 | 9.6e-06 | 8.2e-06 | 8.7e-06 | 9.4e-06 | 0.1407190 | 0.1202965 | 0.1254721 | 0.1179186 |
以下使用tidyr::fill()
弥补30%数据缺失值。
data_m1_30_tidyr <- data_m1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_m1_30_tidyr %>% anyNA
## [1] FALSE
data_m1_30_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_30_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 1,929 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-10-17 00:05:00 104. 104. 104. 104. 104. 104.
## 2 2016-10-17 00:09:00 104. 104. 104. 104. 104. 104.
## 3 2016-10-17 00:10:00 104. 104. 104. 104. 104. 104.
## 4 2016-10-17 00:11:00 104. 104. 104. 104. 104. 104.
## 5 2016-10-17 00:19:00 104. 104. 104. 104. 104. 104.
## 6 2016-10-17 00:23:00 104. 104. 104. 104. 104. 104.
## 7 2016-10-17 00:38:00 104. 104. 104. 104. 104. 104.
## 8 2016-10-17 00:39:00 104. 104. 104. 104. 104. 104.
## 9 2016-10-17 00:42:00 104. 104. 104. 104. 104. 104.
## 10 2016-10-17 00:44:00 104. 104. 104. 104. 104. 104.
## # ... with 1,919 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_m1_30_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_30_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
8.3e-05 | 6.46e-05 | 6.72e-05 | 7.85e-05 | 7.01e-05 | 7.33e-05 | 0.1658973 | 0.1559659 | 0.1465939 | 0.1577843 |
以下使用imputeTS::na.seadec()
弥补30%数据缺失值。
data_tm1_NA <- data_tm1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.3) %>%
cbind(data_tm1[1], .) %>% tbl_df
data_tm1_30_impTS <- llply(algo, function(x) {
data_tm1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_tm1_30_impTS) <- algo
data_tm1_30_impTS %<>% ldply %>% tbl_df
data_tm1_30_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_30_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 3,569 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 2 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 3 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 4 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 5 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 6 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 7 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 8 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 9 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 10 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## # ... with 3,559 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_tm1_30_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_30_impTS %>%
kable(caption = 'MSE 30% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 48.06474 | 48.03922 | 48.09377 | 48.06792 | 48.06697 | 48.06641 | 0.1461916 | 0.1351351 | 0.1332924 | 0.1461916 |
kalman | 48.06447 | 48.03942 | 48.09377 | 48.06791 | 48.06703 | 48.06639 | 0.1529484 | 0.1418919 | 0.1363636 | 0.1541769 |
locf | 48.06791 | 48.04236 | 48.09580 | 48.06738 | 48.06851 | 48.06836 | 0.1621622 | 0.1461916 | 0.1541769 | 0.1658477 |
ma | 48.06320 | 48.04018 | 48.09408 | 48.06792 | 48.06739 | 48.06635 | 0.1566339 | 0.1345209 | 0.1547912 | 0.1689189 |
mean | 48.07771 | 48.04385 | 48.10906 | 48.09428 | 48.08240 | 48.08122 | 0.3642506 | 0.3507371 | 0.3224816 | 0.3869779 |
random | 48.25972 | 48.06207 | 48.33286 | 47.77778 | 48.05757 | 48.10811 | 0.5165848 | 0.4864865 | 0.5558968 | 0.5423833 |
以下使用Amelia::amelia()
弥补30%数据缺失值。
data_tm1_30_amelia <- data_tm1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8
data_tm1_30_amelia %>% anyNA
## [1] FALSE
data_tm1_30_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_30_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 1,704 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2017-08-07 00:04:00 111. 111. 111. 111. 111.
## 2 imp1 2017-08-07 00:07:00 111. 111. 111. 111. 111.
## 3 imp1 2017-08-07 00:17:00 111. 111. 111. 111. 111.
## 4 imp1 2017-08-07 00:18:00 111. 111. 111. 111. 111.
## 5 imp1 2017-08-07 00:22:00 111. 111. 111. 111. 111.
## 6 imp1 2017-08-07 00:50:00 111. 111. 111. 111. 111.
## 7 imp1 2017-08-07 00:52:00 111. 111. 111. 111. 111.
## 8 imp1 2017-08-07 00:59:00 111. 111. 111. 111. 111.
## 9 imp1 2017-08-07 01:00:00 111. 111. 111. 111. 111.
## 10 imp1 2017-08-07 01:12:00 111. 111. 111. 111. 111.
## # ... with 1,694 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_tm1_30_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_30_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 48.06165 | 48.03872 | 48.09386 | 48.06788 | 48.06682 | 48.06553 | 0.1425061 | 0.1314496 | 0.1173219 | 0.1216216 |
imp2 | 48.06313 | 48.03819 | 48.09304 | 48.06734 | 48.06619 | 48.06543 | 0.1412776 | 0.1332924 | 0.1025799 | 0.1068796 |
imp3 | 48.06348 | 48.03940 | 48.09352 | 48.06649 | 48.06647 | 48.06572 | 0.1308354 | 0.1210074 | 0.1081081 | 0.1093366 |
imp4 | 48.06310 | 48.04006 | 48.09321 | 48.06804 | 48.06711 | 48.06610 | 0.1351351 | 0.1283784 | 0.1062654 | 0.1142506 |
imp5 | 48.06414 | 48.03950 | 48.09303 | 48.06767 | 48.06673 | 48.06608 | 0.1400491 | 0.1265356 | 0.1056511 | 0.1068796 |
以下使用tidyr::fill()
弥补30%数据缺失值。
data_tm1_30_tidyr <- data_tm1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_tm1_30_tidyr %>% anyNA
## [1] FALSE
data_tm1_30_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_30_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 431 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-08-07 00:02:00 111. 111. 111. 111. 111. 111.
## 2 2017-08-07 00:03:00 111. 111. 111. 111. 111. 111.
## 3 2017-08-07 00:04:00 111. 111. 111. 111. 111. 111.
## 4 2017-08-07 00:05:00 111. 111. 111. 111. 111. 111.
## 5 2017-08-07 00:06:00 111. 111. 111. 111. 111. 111.
## 6 2017-08-07 00:07:00 111. 111. 111. 111. 111. 111.
## 7 2017-08-07 00:18:00 111. 111. 111. 111. 111. 111.
## 8 2017-08-07 00:43:00 111. 111. 111. 111. 111. 111.
## 9 2017-08-07 00:44:00 111. 111. 111. 111. 111. 111.
## 10 2017-08-07 00:45:00 111. 111. 111. 111. 111. 111.
## # ... with 421 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_tm1_30_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_30_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
48.06791 | 48.04236 | 48.0958 | 48.06738 | 48.06851 | 48.06836 | 0.1621622 | 0.1461916 | 0.1541769 | 0.1658477 |
以下使用imputeTS::na.seadec()
弥补50%数据缺失值。
data_m1_NA <- data_m1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.5) %>%
cbind(data_m1[1], .) %>% tbl_df
data_m1_50_impTS <- llply(algo, function(x) {
data_m1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_m1_50_impTS) <- algo
data_m1_50_impTS %<>% ldply %>% tbl_df
data_m1_50_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_50_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 22,342 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 2 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 3 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 4 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 5 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 6 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 7 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 8 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 9 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 10 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## # ... with 22,332 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_m1_50_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_50_impTS %>%
kable(caption = 'MSE 50% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 0.0000704 | 0.0000582 | 0.0000780 | 0.0000750 | 0.0000704 | 0.0000704 | 0.2460484 | 0.2282837 | 0.2087005 | 0.2559799 |
kalman | 0.0000704 | 0.0000582 | 0.0000783 | 0.0000750 | 0.0000705 | 0.0000705 | 0.2530424 | 0.2322003 | 0.2149951 | 0.2632536 |
locf | 0.0001652 | 0.0001420 | 0.0001480 | 0.0001916 | 0.0001606 | 0.0001617 | 0.2764023 | 0.2480067 | 0.2397538 | 0.2771017 |
ma | 0.0000804 | 0.0000680 | 0.0001001 | 0.0000872 | 0.0000851 | 0.0000839 | 0.2808784 | 0.2552805 | 0.2345783 | 0.2899706 |
mean | 0.0306066 | 0.0306853 | 0.0305917 | 0.0302918 | 0.0305230 | 0.0305439 | 0.4684571 | 0.4066303 | 0.4129249 | 0.4671982 |
random | 0.0401562 | 0.0407120 | 0.0546502 | 0.0518731 | 0.0490784 | 0.0468479 | 0.8181564 | 0.7682193 | 0.7954959 | 0.7911596 |
以下使用Amelia::amelia()
弥补50%数据缺失值。
data_m1_50_amelia <- data_m1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8
data_m1_50_amelia %>% anyNA
## [1] FALSE
data_m1_50_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_50_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 12,149 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2016-10-17 00:03:00 104. 104. 104. 104. 104.
## 2 imp1 2016-10-17 00:04:00 104. 104. 104. 104. 104.
## 3 imp1 2016-10-17 00:05:00 104. 104. 104. 104. 104.
## 4 imp1 2016-10-17 00:07:00 104. 104. 104. 104. 104.
## 5 imp1 2016-10-17 00:08:00 104. 104. 104. 104. 104.
## 6 imp1 2016-10-17 00:09:00 104. 104. 104. 104. 104.
## 7 imp1 2016-10-17 00:10:00 104. 104. 104. 104. 104.
## 8 imp1 2016-10-17 00:11:00 104. 104. 104. 104. 104.
## 9 imp1 2016-10-17 00:12:00 104. 104. 104. 104. 104.
## 10 imp1 2016-10-17 00:13:00 104. 104. 104. 104. 104.
## # ... with 12,139 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_m1_50_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_50_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 0.0002917 | 0.0002841 | 0.0002886 | 0.0002930 | 0.0002886 | 0.0002894 | 0.2471674 | 0.2261855 | 0.1993286 | 0.1994685 |
imp2 | 0.0004936 | 0.0004858 | 0.0004941 | 0.0004908 | 0.0004902 | 0.0004911 | 0.2330396 | 0.2022661 | 0.2007274 | 0.1881382 |
imp3 | 0.0005796 | 0.0005732 | 0.0005761 | 0.0005780 | 0.0005758 | 0.0005767 | 0.2238075 | 0.2071618 | 0.1945727 | 0.1948524 |
imp4 | 0.0004111 | 0.0004062 | 0.0004048 | 0.0004095 | 0.0004068 | 0.0004079 | 0.2572388 | 0.2260456 | 0.2201707 | 0.2075815 |
imp5 | 0.0005000 | 0.0004896 | 0.0005036 | 0.0004981 | 0.0004971 | 0.0004978 | 0.2285634 | 0.2045041 | 0.2000280 | 0.1926144 |
以下使用tidyr::fill()
弥补50%数据缺失值。
data_m1_50_tidyr <- data_m1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_m1_50_tidyr %>% anyNA
## [1] FALSE
data_m1_50_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_50_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 2,974 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-10-17 00:04:00 104. 104. 104. 104. 104. 104.
## 2 2016-10-17 00:06:00 104. 104. 104. 104. 104. 104.
## 3 2016-10-17 00:07:00 104. 104. 104. 104. 104. 104.
## 4 2016-10-17 00:08:00 104. 104. 104. 104. 104. 104.
## 5 2016-10-17 00:09:00 104. 104. 104. 104. 104. 104.
## 6 2016-10-17 00:10:00 104. 104. 104. 104. 104. 104.
## 7 2016-10-17 00:11:00 104. 104. 104. 104. 104. 104.
## 8 2016-10-17 00:12:00 104. 104. 104. 104. 104. 104.
## 9 2016-10-17 00:13:00 104. 104. 104. 104. 104. 104.
## 10 2016-10-17 00:20:00 104. 104. 104. 104. 104. 104.
## # ... with 2,964 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_m1_50_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_50_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
0.0001652 | 0.000142 | 0.000148 | 0.0001916 | 0.0001606 | 0.0001617 | 0.2764023 | 0.2480067 | 0.2397538 | 0.2771017 |
以下使用imputeTS::na.seadec()
弥补50%数据缺失值。
data_tm1_NA <- data_tm1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.5) %>%
cbind(data_tm1[1], .) %>% tbl_df
data_tm1_50_impTS <- llply(algo, function(x) {
data_tm1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_tm1_50_impTS) <- algo
data_tm1_50_impTS %<>% ldply %>% tbl_df
data_tm1_50_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_50_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 4,649 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 2 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 3 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 4 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 5 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 6 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 7 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 8 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 9 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 10 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## # ... with 4,639 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_tm1_50_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_50_impTS %>%
kable(caption = 'MSE 50% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 48.06646 | 48.03692 | 48.09347 | 48.06623 | 48.06554 | 48.06577 | 0.2223587 | 0.2094595 | 0.1799754 | 0.2260442 |
kalman | 48.06676 | 48.03676 | 48.09347 | 48.06627 | 48.06550 | 48.06581 | 0.2340295 | 0.2168305 | 0.1848894 | 0.2315725 |
locf | 48.06433 | 48.03798 | 48.09349 | 48.06476 | 48.06541 | 48.06514 | 0.2807125 | 0.2444717 | 0.2242015 | 0.2585995 |
ma | 48.06724 | 48.03628 | 48.09353 | 48.06685 | 48.06555 | 48.06597 | 0.2469287 | 0.2266585 | 0.1996314 | 0.2432432 |
mean | 48.10011 | 48.05789 | 48.10226 | 48.06800 | 48.07605 | 48.08206 | 0.4484029 | 0.3900491 | 0.3832924 | 0.4318182 |
random | 48.61875 | 48.18569 | 47.83183 | 47.39597 | 47.80450 | 48.00806 | 0.6044226 | 0.5761671 | 0.5909091 | 0.6173219 |
以下使用Amelia::amelia()
弥补50%数据缺失值。
data_tm1_50_amelia <- data_tm1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45
data_tm1_50_amelia %>% anyNA
## [1] FALSE
data_tm1_50_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_50_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 2,800 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2017-08-07 00:03:00 111. 111. 111. 111. 111.
## 2 imp1 2017-08-07 00:07:00 111. 111. 111. 111. 111.
## 3 imp1 2017-08-07 00:08:00 111. 111. 111. 111. 111.
## 4 imp1 2017-08-07 00:18:00 111. 111. 111. 111. 111.
## 5 imp1 2017-08-07 00:24:00 111. 111. 111. 111. 111.
## 6 imp1 2017-08-07 00:40:00 111. 111. 111. 111. 111.
## 7 imp1 2017-08-07 00:42:00 111. 111. 111. 111. 111.
## 8 imp1 2017-08-07 00:43:00 111. 111. 111. 111. 111.
## 9 imp1 2017-08-07 00:44:00 111. 111. 111. 111. 111.
## 10 imp1 2017-08-07 00:45:00 111. 111. 111. 111. 111.
## # ... with 2,790 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_tm1_50_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_50_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 48.06908 | 48.04628 | 48.10127 | 48.07136 | 48.07297 | 48.07200 | 0.2192875 | 0.1934889 | 0.2039312 | 0.2027027 |
imp2 | 48.06819 | 48.04509 | 48.09811 | 48.07183 | 48.07168 | 48.07080 | 0.2340295 | 0.2235872 | 0.2094595 | 0.2260442 |
imp3 | 48.06598 | 48.04386 | 48.09902 | 48.06832 | 48.07040 | 48.06930 | 0.2156020 | 0.1971744 | 0.2082310 | 0.2100737 |
imp4 | 48.06758 | 48.04450 | 48.09766 | 48.07323 | 48.07180 | 48.07074 | 0.2143735 | 0.2039312 | 0.1947174 | 0.2063882 |
imp5 | 48.06570 | 48.04092 | 48.09609 | 48.06943 | 48.06881 | 48.06803 | 0.2506143 | 0.2346437 | 0.2223587 | 0.2297297 |
以下使用tidyr::fill()
弥补50%数据缺失值。
data_tm1_50_tidyr <- data_tm1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_tm1_50_tidyr %>% anyNA
## [1] FALSE
data_tm1_50_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_50_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 656 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-08-07 00:02:00 111. 111. 111. 111. 111. 111.
## 2 2017-08-07 00:03:00 111. 111. 111. 111. 111. 111.
## 3 2017-08-07 00:04:00 111. 111. 111. 111. 111. 111.
## 4 2017-08-07 00:05:00 111. 111. 111. 111. 111. 111.
## 5 2017-08-07 00:07:00 111. 111. 111. 111. 111. 111.
## 6 2017-08-07 00:17:00 111. 111. 111. 111. 111. 111.
## 7 2017-08-07 00:18:00 111. 111. 111. 111. 111. 111.
## 8 2017-08-07 00:43:00 111. 111. 111. 111. 111. 111.
## 9 2017-08-07 00:44:00 111. 111. 111. 111. 111. 111.
## 10 2017-08-07 00:45:00 111. 111. 111. 111. 111. 111.
## # ... with 646 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_tm1_50_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_50_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
48.06433 | 48.03798 | 48.09349 | 48.06476 | 48.06541 | 48.06514 | 0.2807125 | 0.2444717 | 0.2242015 | 0.2585995 |
以下使用imputeTS::na.seadec()
弥补65%数据缺失值。
data_m1_NA <- data_m1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.65) %>%
cbind(data_m1[1], .) %>% tbl_df
data_m1_65_impTS <- llply(algo, function(x) {
data_m1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_m1_65_impTS) <- algo
data_m1_65_impTS %<>% ldply %>% tbl_df
data_m1_65_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_65_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 24,251 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 2 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 3 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 4 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 5 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 6 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 7 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 8 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 9 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 10 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## # ... with 24,241 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_m1_65_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_65_impTS %>%
kable(caption = 'MSE 65% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 0.0001279 | 0.0001238 | 0.0001136 | 0.0001206 | 0.0001193 | 0.0001215 | 0.3662051 | 0.3241013 | 0.2831165 | 0.3394880 |
kalman | 0.0001282 | 0.0001238 | 0.0001133 | 0.0001202 | 0.0001191 | 0.0001214 | 0.3674640 | 0.3241013 | 0.2845153 | 0.3415862 |
locf | 0.0002955 | 0.0003304 | 0.0002383 | 0.0002986 | 0.0002891 | 0.0002907 | 0.3817317 | 0.3404672 | 0.3154287 | 0.3776752 |
ma | 0.0001512 | 0.0001488 | 0.0001313 | 0.0001449 | 0.0001417 | 0.0001441 | 0.3813121 | 0.3329137 | 0.3022800 | 0.3621486 |
mean | 0.0400602 | 0.0406286 | 0.0391629 | 0.0395314 | 0.0397743 | 0.0398458 | 0.4402014 | 0.3983774 | 0.3761365 | 0.4364247 |
random | 0.0533171 | 0.0806651 | 0.1361035 | 0.1684688 | 0.1284125 | 0.1096386 | 0.2796195 | 0.2432508 | 0.6848510 | 0.6998182 |
以下使用Amelia::amelia()
弥补65%数据缺失值。
data_m1_65_amelia <- data_m1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
data_m1_65_amelia %>% anyNA
## [1] FALSE
data_m1_65_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_65_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 17,317 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2016-10-17 00:03:00 104. 104. 104. 104. 104.
## 2 imp1 2016-10-17 00:04:00 104. 104. 104. 104. 104.
## 3 imp1 2016-10-17 00:07:00 104. 104. 104. 104. 104.
## 4 imp1 2016-10-17 00:08:00 104. 104. 104. 104. 104.
## 5 imp1 2016-10-17 00:12:00 104. 104. 104. 104. 104.
## 6 imp1 2016-10-17 00:15:00 104. 104. 104. 104. 104.
## 7 imp1 2016-10-17 00:16:00 104. 104. 104. 104. 104.
## 8 imp1 2016-10-17 00:17:00 104. 104. 104. 104. 104.
## 9 imp1 2016-10-17 00:18:00 104. 104. 104. 104. 104.
## 10 imp1 2016-10-17 00:20:00 104. 104. 104. 104. 104.
## # ... with 17,307 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_m1_65_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_65_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 0.0037038 | 0.0036950 | 0.0037028 | 0.0037144 | 0.0037040 | 0.0037040 | 0.3534760 | 0.3313750 | 0.3049378 | 0.3376696 |
imp2 | 0.0037461 | 0.0037553 | 0.0037236 | 0.0037351 | 0.0037380 | 0.0037400 | 0.3432648 | 0.3056372 | 0.2940271 | 0.3113722 |
imp3 | 0.0040437 | 0.0040587 | 0.0040400 | 0.0040502 | 0.0040496 | 0.0040481 | 0.3513778 | 0.3303959 | 0.2906700 | 0.3392083 |
imp4 | 0.0040114 | 0.0039923 | 0.0040178 | 0.0040065 | 0.0040055 | 0.0040070 | 0.3424255 | 0.3155686 | 0.2971045 | 0.3151490 |
imp5 | 0.0041922 | 0.0041956 | 0.0041963 | 0.0042075 | 0.0041998 | 0.0041979 | 0.3347321 | 0.3033991 | 0.2861939 | 0.3043782 |
以下使用tidyr::fill()
弥补65%数据缺失值。
data_m1_65_tidyr <- data_m1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_m1_65_tidyr %>% anyNA
## [1] FALSE
data_m1_65_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_65_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 3,737 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-10-17 00:07:00 104. 104. 104. 104. 104. 104.
## 2 2016-10-17 00:08:00 104. 104. 104. 104. 104. 104.
## 3 2016-10-17 00:09:00 104. 104. 104. 104. 104. 104.
## 4 2016-10-17 00:10:00 104. 104. 104. 104. 104. 104.
## 5 2016-10-17 00:11:00 104. 104. 104. 104. 104. 104.
## 6 2016-10-17 00:12:00 104. 104. 104. 104. 104. 104.
## 7 2016-10-17 00:13:00 104. 104. 104. 104. 104. 104.
## 8 2016-10-17 00:14:00 104. 104. 104. 104. 104. 104.
## 9 2016-10-17 00:15:00 104. 104. 104. 104. 104. 104.
## 10 2016-10-17 00:18:00 104. 104. 104. 104. 104. 104.
## # ... with 3,727 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_m1_65_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_65_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
0.0002955 | 0.0003304 | 0.0002383 | 0.0002986 | 0.0002891 | 0.0002907 | 0.3817317 | 0.3404672 | 0.3154287 | 0.3776752 |
以下使用imputeTS::na.seadec()
弥补65%数据缺失值。
data_tm1_NA <- data_tm1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.65) %>%
cbind(data_tm1[1], .) %>% tbl_df
data_tm1_65_impTS <- llply(algo, function(x) {
data_tm1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_tm1_65_impTS) <- algo
data_tm1_65_impTS %<>% ldply %>% tbl_df
data_tm1_65_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_65_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 5,425 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 2 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 3 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 4 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 5 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 6 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 7 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 8 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 9 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 10 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## # ... with 5,415 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_tm1_65_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_65_impTS %>%
kable(caption = 'MSE 65% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 48.07036 | 48.04468 | 48.08991 | 48.07116 | 48.06858 | 48.06903 | 0.3095823 | 0.2659705 | 0.2457002 | 0.2948403 |
kalman | 48.07038 | 48.04468 | 48.08991 | 48.07061 | 48.06840 | 48.06890 | 0.3114251 | 0.2684275 | 0.2450860 | 0.2911548 |
locf | 48.06478 | 48.04620 | 48.08847 | 48.06434 | 48.06634 | 48.06595 | 0.3544226 | 0.3138821 | 0.2942260 | 0.3445946 |
ma | 48.07108 | 48.04394 | 48.09041 | 48.07027 | 48.06821 | 48.06892 | 0.3353808 | 0.2764128 | 0.2647420 | 0.3101966 |
mean | 48.06675 | 48.02981 | 48.12859 | 48.11165 | 48.09002 | 48.08420 | 0.4367322 | 0.3814496 | 0.3783784 | 0.4176904 |
random | 47.50568 | 48.49940 | 49.01626 | 47.85631 | 48.45733 | 48.21942 | 0.8832924 | 0.6977887 | 0.9195332 | 0.8298526 |
以下使用Amelia::amelia()
弥补65%数据缺失值。
data_tm1_65_amelia <- data_tm1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48
data_tm1_65_amelia %>% anyNA
## [1] FALSE
data_tm1_65_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_65_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 3,558 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2017-08-07 00:03:00 111. 111. 111. 111. 111.
## 2 imp1 2017-08-07 00:04:00 111. 111. 111. 111. 111.
## 3 imp1 2017-08-07 00:05:00 111. 111. 111. 111. 111.
## 4 imp1 2017-08-07 00:06:00 111. 111. 111. 111. 111.
## 5 imp1 2017-08-07 00:09:00 111. 111. 111. 111. 111.
## 6 imp1 2017-08-07 00:18:00 111. 111. 111. 111. 111.
## 7 imp1 2017-08-07 00:22:00 111. 111. 111. 111. 111.
## 8 imp1 2017-08-07 00:24:00 111. 111. 111. 111. 111.
## 9 imp1 2017-08-07 00:38:00 111. 111. 111. 111. 111.
## 10 imp1 2017-08-07 00:39:00 111. 111. 111. 111. 111.
## # ... with 3,548 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_tm1_65_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_65_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 48.06753 | 48.03945 | 48.09411 | 48.06812 | 48.06723 | 48.06730 | 0.3519656 | 0.3028256 | 0.2714988 | 0.2542998 |
imp2 | 48.06781 | 48.03969 | 48.09423 | 48.06629 | 48.06673 | 48.06700 | 0.3347666 | 0.2819410 | 0.2813268 | 0.2647420 |
imp3 | 48.06148 | 48.03451 | 48.08888 | 48.06000 | 48.06113 | 48.06122 | 0.3132678 | 0.2727273 | 0.2880835 | 0.2788698 |
imp4 | 48.06864 | 48.03979 | 48.09638 | 48.06667 | 48.06761 | 48.06787 | 0.3464373 | 0.2929975 | 0.3015971 | 0.2745700 |
imp5 | 48.06606 | 48.03924 | 48.08798 | 48.06334 | 48.06352 | 48.06416 | 0.3335381 | 0.2899263 | 0.2506143 | 0.2555283 |
以下使用tidyr::fill()
弥补65%数据缺失值。
data_tm1_65_tidyr <- data_tm1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_tm1_65_tidyr %>% anyNA
## [1] FALSE
data_tm1_65_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_65_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 804 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-08-07 00:02:00 111. 111. 111. 111. 111. 111.
## 2 2017-08-07 00:03:00 111. 111. 111. 111. 111. 111.
## 3 2017-08-07 00:06:00 111. 111. 111. 111. 111. 111.
## 4 2017-08-07 00:07:00 111. 111. 111. 111. 111. 111.
## 5 2017-08-07 00:08:00 111. 111. 111. 111. 111. 111.
## 6 2017-08-07 00:09:00 111. 111. 111. 111. 111. 111.
## 7 2017-08-07 00:11:00 111. 111. 111. 111. 111. 111.
## 8 2017-08-07 00:22:00 111. 111. 111. 111. 111. 111.
## 9 2017-08-07 00:24:00 111. 111. 111. 111. 111. 111.
## 10 2017-08-07 00:38:00 111. 111. 111. 111. 111. 111.
## # ... with 794 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_tm1_65_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_65_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
48.06478 | 48.0462 | 48.08847 | 48.06434 | 48.06634 | 48.06595 | 0.3544226 | 0.3138821 | 0.294226 | 0.3445946 |
以下使用imputeTS::na.seadec()
弥补70%数据缺失值。
data_m1_NA <- data_m1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.7) %>%
cbind(data_m1[1], .) %>% tbl_df
data_m1_70_impTS <- llply(algo, function(x) {
data_m1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_m1_70_impTS) <- algo
data_m1_70_impTS %<>% ldply %>% tbl_df
data_m1_70_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_70_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 26,788 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 2 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 3 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 4 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 5 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 6 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 7 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 8 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 9 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 10 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## # ... with 26,778 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_m1_70_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_70_impTS %>%
kable(caption = 'MSE 70% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 0.0001618 | 0.0001265 | 0.0001262 | 0.0001725 | 0.0001417 | 0.0001468 | 0.3927822 | 0.3453630 | 0.3225626 | 0.3881662 |
kalman | 0.0001618 | 0.0001265 | 0.0001262 | 0.0001725 | 0.0001417 | 0.0001468 | 0.3959994 | 0.3457826 | 0.3231221 | 0.3883061 |
locf | 0.0003904 | 0.0003374 | 0.0003273 | 0.0004098 | 0.0003581 | 0.0003662 | 0.4253742 | 0.3855085 | 0.3597706 | 0.4227165 |
ma | 0.0001928 | 0.0001537 | 0.0001548 | 0.0001982 | 0.0001689 | 0.0001749 | 0.4151630 | 0.3650860 | 0.3400476 | 0.4130648 |
mean | 0.0434017 | 0.0423406 | 0.0426192 | 0.0426670 | 0.0425423 | 0.0427571 | 0.4084487 | 0.3775353 | 0.3564135 | 0.4071898 |
random | 0.1551202 | 0.0523911 | 0.1117988 | 0.2454679 | 0.1365526 | 0.1411945 | 0.8742481 | 0.8708910 | 0.7637432 | 0.8887956 |
以下使用Amelia::amelia()
弥补70%数据缺失值。
data_m1_70_amelia <- data_m1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
##
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
data_m1_70_amelia %>% anyNA
## [1] FALSE
data_m1_70_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_70_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 20,129 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2016-10-17 00:00:00 104. 104. 104. 104. 104.
## 2 imp1 2016-10-17 00:01:00 104. 104. 104. 104. 104.
## 3 imp1 2016-10-17 00:03:00 104. 104. 104. 104. 104.
## 4 imp1 2016-10-17 00:04:00 104. 104. 104. 104. 104.
## 5 imp1 2016-10-17 00:06:00 104. 104. 104. 104. 104.
## 6 imp1 2016-10-17 00:08:00 104. 104. 104. 104. 104.
## 7 imp1 2016-10-17 00:09:00 104. 104. 104. 104. 104.
## 8 imp1 2016-10-17 00:10:00 104. 104. 104. 104. 104.
## 9 imp1 2016-10-17 00:12:00 104. 104. 104. 104. 104.
## 10 imp1 2016-10-17 00:14:00 104. 104. 104. 104. 104.
## # ... with 20,119 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_m1_70_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_70_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 0.0065354 | 0.0065133 | 0.0065083 | 0.0064779 | 0.0064999 | 0.0065087 | 0.4273325 | 0.3474612 | 0.3517975 | 0.3743181 |
imp2 | 0.0058556 | 0.0058747 | 0.0058369 | 0.0058674 | 0.0058597 | 0.0058586 | 0.4127850 | 0.3397678 | 0.3446636 | 0.3655057 |
imp3 | 0.0063261 | 0.0062797 | 0.0063244 | 0.0063016 | 0.0063019 | 0.0063080 | 0.4242551 | 0.3555742 | 0.3613093 | 0.3979578 |
imp4 | 0.0062205 | 0.0061725 | 0.0061818 | 0.0061766 | 0.0061770 | 0.0061879 | 0.4052315 | 0.3361309 | 0.3291369 | 0.3536159 |
imp5 | 0.0063617 | 0.0063084 | 0.0063286 | 0.0063186 | 0.0063185 | 0.0063293 | 0.4506924 | 0.3659253 | 0.3615890 | 0.3909638 |
以下使用tidyr::fill()
弥补70%数据缺失值。
data_m1_70_tidyr <- data_m1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_m1_70_tidyr %>% anyNA
## [1] FALSE
data_m1_70_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_70_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 4,143 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-10-17 00:00:00 104. 104. 104. 104. 104. 104.
## 2 2016-10-17 00:01:00 104. 104. 104. 104. 104. 104.
## 3 2016-10-17 00:03:00 104. 104. 104. 104. 104. 104.
## 4 2016-10-17 00:04:00 104. 104. 104. 104. 104. 104.
## 5 2016-10-17 00:05:00 104. 104. 104. 104. 104. 104.
## 6 2016-10-17 00:06:00 104. 104. 104. 104. 104. 104.
## 7 2016-10-17 00:07:00 104. 104. 104. 104. 104. 104.
## 8 2016-10-17 00:08:00 104. 104. 104. 104. 104. 104.
## 9 2016-10-17 00:09:00 104. 104. 104. 104. 104. 104.
## 10 2016-10-17 00:10:00 104. 104. 104. 104. 104. 104.
## # ... with 4,133 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_m1_70_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_70_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
0.0003904 | 0.0003374 | 0.0003273 | 0.0004098 | 0.0003581 | 0.0003662 | 0.4253742 | 0.3855085 | 0.3597706 | 0.4227165 |
以下使用imputeTS::na.seadec()
弥补70%数据缺失值。
data_tm1_NA <- data_tm1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.7) %>%
cbind(data_tm1[1], .) %>% tbl_df
data_tm1_70_impTS <- llply(algo, function(x) {
data_tm1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_tm1_70_impTS) <- algo
data_tm1_70_impTS %<>% ldply %>% tbl_df
data_tm1_70_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_70_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 6,299 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 2 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 3 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 4 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 5 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 6 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 7 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 8 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 9 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 10 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## # ... with 6,289 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_tm1_70_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_70_impTS %>%
kable(caption = 'MSE 70% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 48.05448 | 48.02946 | 48.08666 | 48.07724 | 48.06445 | 48.06196 | 0.3421376 | 0.3243243 | 0.2616708 | 0.3452088 |
kalman | 48.05440 | 48.02955 | 48.08650 | 48.07724 | 48.06443 | 48.06192 | 0.3415233 | 0.3212531 | 0.2653563 | 0.3445946 |
locf | 48.05579 | 48.03106 | 48.07434 | 48.08234 | 48.06258 | 48.06088 | 0.4029484 | 0.4004914 | 0.3003686 | 0.4054054 |
ma | 48.05462 | 48.02955 | 48.08596 | 48.07753 | 48.06435 | 48.06191 | 0.3617936 | 0.3445946 | 0.2856265 | 0.3703931 |
mean | 48.12663 | 48.00807 | 48.08176 | 48.06426 | 48.05137 | 48.07018 | 0.8335381 | 0.7665848 | 0.3243243 | 0.3863636 |
random | 47.68556 | 47.12402 | 47.95719 | 49.26794 | 48.11638 | 48.00868 | 0.9324324 | 0.9262899 | 0.9146192 | 0.9275184 |
以下使用Amelia::amelia()
弥补70%数据缺失值。
data_tm1_70_amelia <- data_tm1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
data_tm1_70_amelia %>% anyNA
## [1] FALSE
data_tm1_70_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_70_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 3,697 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2017-08-07 00:02:00 111. 111. 111. 111. 111.
## 2 imp1 2017-08-07 00:03:00 111. 111. 111. 111. 111.
## 3 imp1 2017-08-07 00:04:00 111. 111. 111. 111. 111.
## 4 imp1 2017-08-07 00:05:00 111. 111. 111. 111. 111.
## 5 imp1 2017-08-07 00:06:00 111. 111. 111. 111. 111.
## 6 imp1 2017-08-07 00:08:00 111. 111. 111. 111. 111.
## 7 imp1 2017-08-07 00:18:00 111. 111. 111. 111. 111.
## 8 imp1 2017-08-07 00:22:00 111. 111. 111. 111. 111.
## 9 imp1 2017-08-07 00:39:00 111. 111. 111. 111. 111.
## 10 imp1 2017-08-07 00:44:00 111. 111. 111. 111. 111.
## # ... with 3,687 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_tm1_70_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_70_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 48.06299 | 48.03345 | 48.09202 | 48.06911 | 48.06486 | 48.06439 | 0.3378378 | 0.3015971 | 0.2702703 | 0.2850123 |
imp2 | 48.06721 | 48.03401 | 48.10299 | 48.07360 | 48.07020 | 48.06945 | 0.3310811 | 0.3065111 | 0.2788698 | 0.3003686 |
imp3 | 48.05517 | 48.02155 | 48.08947 | 48.06446 | 48.05849 | 48.05766 | 0.3187961 | 0.3175676 | 0.2911548 | 0.3402948 |
imp4 | 48.05450 | 48.03275 | 48.09509 | 48.06249 | 48.06344 | 48.06121 | 0.3243243 | 0.2628993 | 0.2807125 | 0.2585995 |
imp5 | 48.06417 | 48.04374 | 48.09782 | 48.08140 | 48.07432 | 48.07178 | 0.2997543 | 0.2972973 | 0.2899263 | 0.3230958 |
以下使用tidyr::fill()
弥补70%数据缺失值。
data_tm1_70_tidyr <- data_tm1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_tm1_70_tidyr %>% anyNA
## [1] FALSE
data_tm1_70_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_70_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 905 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-08-07 00:04:00 111. 111. 111. 111. 111. 111.
## 2 2017-08-07 00:05:00 111. 111. 111. 111. 111. 111.
## 3 2017-08-07 00:06:00 111. 111. 111. 111. 111. 111.
## 4 2017-08-07 00:07:00 111. 111. 111. 111. 111. 111.
## 5 2017-08-07 00:08:00 111. 111. 111. 111. 111. 111.
## 6 2017-08-07 00:17:00 111. 111. 111. 111. 111. 111.
## 7 2017-08-07 00:18:00 111. 111. 111. 111. 111. 111.
## 8 2017-08-07 00:22:00 111. 111. 111. 111. 111. 111.
## 9 2017-08-07 00:24:00 111. 111. 111. 111. 111. 111.
## 10 2017-08-07 00:38:00 111. 111. 111. 111. 111. 111.
## # ... with 895 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_tm1_70_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_70_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
48.05579 | 48.03106 | 48.07434 | 48.08234 | 48.06258 | 48.06088 | 0.4029484 | 0.4004914 | 0.3003686 | 0.4054054 |
以下使用imputeTS::na.seadec()
弥补80%数据缺失值。
data_m1_NA <- data_m1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.8) %>%
cbind(data_m1[1], .) %>% tbl_df
data_m1_80_impTS <- llply(algo, function(x) {
data_m1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_m1_80_impTS) <- algo
data_m1_80_impTS %<>% ldply %>% tbl_df
data_m1_80_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_80_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 28,397 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 2 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 3 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 4 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 5 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 6 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 7 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 8 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 9 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 10 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## # ... with 28,387 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_m1_80_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_80_impTS %>%
kable(caption = 'MSE 80% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 0.0002653 | 0.0002080 | 0.0002403 | 0.0002507 | 0.0002330 | 0.0002411 | 0.4865016 | 0.4063505 | 0.4062107 | 0.4692964 |
kalman | 0.0002647 | 0.0002080 | 0.0002403 | 0.0002511 | 0.0002331 | 0.0002410 | 0.4876206 | 0.4055113 | 0.4049517 | 0.4662191 |
locf | 0.0007388 | 0.0005612 | 0.0005608 | 0.0006582 | 0.0005934 | 0.0006297 | 0.5302840 | 0.4497132 | 0.4574066 | 0.5218912 |
ma | 0.0003359 | 0.0002728 | 0.0002943 | 0.0003105 | 0.0002925 | 0.0003034 | 0.5045461 | 0.4288712 | 0.4227165 | 0.4869212 |
mean | 0.0486525 | 0.0484767 | 0.0482804 | 0.0487259 | 0.0484943 | 0.0485339 | 0.2989229 | 0.2805987 | 0.2605959 | 0.2943069 |
random | 0.0967558 | 0.0565147 | 0.1428038 | 0.0580634 | 0.0857940 | 0.0885344 | 0.8461323 | 0.9290810 | 0.1155406 | 0.8341027 |
以下使用Amelia::amelia()
弥补80%数据缺失值。
data_m1_80_amelia <- data_m1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74
data_m1_80_amelia %>% anyNA
## [1] FALSE
data_m1_80_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_80_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 22,084 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2016-10-17 00:01:00 104. 104. 104. 104. 104.
## 2 imp1 2016-10-17 00:02:00 104. 104. 104. 104. 104.
## 3 imp1 2016-10-17 00:03:00 104. 104. 104. 104. 104.
## 4 imp1 2016-10-17 00:04:00 104. 104. 104. 104. 104.
## 5 imp1 2016-10-17 00:06:00 104. 104. 104. 104. 104.
## 6 imp1 2016-10-17 00:07:00 104. 104. 104. 104. 104.
## 7 imp1 2016-10-17 00:09:00 104. 104. 104. 104. 104.
## 8 imp1 2016-10-17 00:10:00 104. 104. 104. 104. 104.
## 9 imp1 2016-10-17 00:12:00 104. 104. 104. 104. 104.
## 10 imp1 2016-10-17 00:15:00 104. 104. 104. 104. 104.
## # ... with 22,074 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_m1_80_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_80_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 0.0179774 | 0.0179669 | 0.0179299 | 0.0179521 | 0.0179496 | 0.0179566 | 0.4511120 | 0.3912435 | 0.3925024 | 0.4497132 |
imp2 | 0.0181715 | 0.0182881 | 0.0182233 | 0.0182494 | 0.0182536 | 0.0182331 | 0.4567072 | 0.3929221 | 0.3660652 | 0.4357253 |
imp3 | 0.0188257 | 0.0189006 | 0.0189500 | 0.0188341 | 0.0188949 | 0.0188776 | 0.4779689 | 0.4242551 | 0.4165618 | 0.5053854 |
imp4 | 0.0178892 | 0.0178482 | 0.0178864 | 0.0178254 | 0.0178533 | 0.0178623 | 0.4648203 | 0.4056511 | 0.3899846 | 0.4616030 |
imp5 | 0.0171595 | 0.0171508 | 0.0172147 | 0.0171742 | 0.0171799 | 0.0171748 | 0.4348860 | 0.3748776 | 0.3765562 | 0.4386628 |
以下使用tidyr::fill()
弥补80%数据缺失值。
data_m1_80_tidyr <- data_m1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_m1_80_tidyr %>% anyNA
## [1] FALSE
data_m1_80_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_80_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 4,865 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-10-17 00:00:00 104. 104. 104. 104. 104. 104.
## 2 2016-10-17 00:01:00 104. 104. 104. 104. 104. 104.
## 3 2016-10-17 00:02:00 104. 104. 104. 104. 104. 104.
## 4 2016-10-17 00:03:00 104. 104. 104. 104. 104. 104.
## 5 2016-10-17 00:04:00 104. 104. 104. 104. 104. 104.
## 6 2016-10-17 00:10:00 104. 104. 104. 104. 104. 104.
## 7 2016-10-17 00:17:00 104. 104. 104. 104. 104. 104.
## 8 2016-10-17 00:18:00 104. 104. 104. 104. 104. 104.
## 9 2016-10-17 00:19:00 104. 104. 104. 104. 104. 104.
## 10 2016-10-17 00:24:00 104. 104. 104. 104. 104. 104.
## # ... with 4,855 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_m1_80_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_80_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
0.0007388 | 0.0005612 | 0.0005608 | 0.0006582 | 0.0005934 | 0.0006297 | 0.530284 | 0.4497132 | 0.4574066 | 0.5218912 |
以下使用imputeTS::na.seadec()
弥补80%数据缺失值。
data_tm1_NA <- data_tm1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.8) %>%
cbind(data_tm1[1], .) %>% tbl_df
data_tm1_80_impTS <- llply(algo, function(x) {
data_tm1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_tm1_80_impTS) <- algo
data_tm1_80_impTS %<>% ldply %>% tbl_df
data_tm1_80_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_80_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 6,887 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 2 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 3 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 4 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 5 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 6 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 7 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 8 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 9 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 10 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## # ... with 6,877 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_tm1_80_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_80_impTS %>%
kable(caption = 'MSE 80% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 48.06756 | 48.04682 | 48.09353 | 48.06823 | 48.06952 | 48.06903 | 0.4250614 | 0.3525799 | 0.3789926 | 0.4244472 |
kalman | 48.06761 | 48.04682 | 48.09357 | 48.06821 | 48.06953 | 48.06905 | 0.4256757 | 0.3519656 | 0.3820639 | 0.4256757 |
locf | 48.07033 | 48.03224 | 48.09007 | 48.06271 | 48.06167 | 48.06384 | 0.4932432 | 0.4404177 | 0.3912776 | 0.4551597 |
ma | 48.06795 | 48.04561 | 48.09361 | 48.06843 | 48.06922 | 48.06890 | 0.4398034 | 0.3808354 | 0.3863636 | 0.4391892 |
mean | 48.05587 | 48.05459 | 48.17520 | 48.12306 | 48.11762 | 48.10218 | 0.8802211 | 0.2708845 | 0.8157248 | 0.2954545 |
random | 49.44640 | 48.28223 | 49.41269 | 47.95915 | 48.55136 | 48.77512 | 0.9686732 | 0.9705160 | 0.9017199 | 0.9133907 |
以下使用Amelia::amelia()
弥补80%数据缺失值。
data_tm1_80_amelia <- data_tm1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
##
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
data_tm1_80_amelia %>% anyNA
## [1] FALSE
data_tm1_80_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_80_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 4,328 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2017-08-07 00:03:00 111. 111. 111. 111. 111.
## 2 imp1 2017-08-07 00:04:00 111. 111. 111. 111. 111.
## 3 imp1 2017-08-07 00:06:00 111. 111. 111. 111. 111.
## 4 imp1 2017-08-07 00:39:00 111. 111. 111. 111. 111.
## 5 imp1 2017-08-07 00:41:00 111. 111. 111. 111. 111.
## 6 imp1 2017-08-07 00:44:00 111. 111. 111. 111. 111.
## 7 imp1 2017-08-07 00:52:00 111. 111. 111. 111. 111.
## 8 imp1 2017-08-07 00:55:00 111. 111. 111. 111. 111.
## 9 imp1 2017-08-07 00:56:00 111. 111. 111. 111. 111.
## 10 imp1 2017-08-07 00:57:00 111. 111. 111. 111. 111.
## # ... with 4,318 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_tm1_80_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_80_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 48.09547 | 48.06079 | 48.12047 | 48.08773 | 48.08966 | 48.09112 | 0.3599509 | 0.3396806 | 0.3065111 | 0.3476658 |
imp2 | 48.09635 | 48.03502 | 48.09513 | 48.06611 | 48.06542 | 48.07315 | 0.4809582 | 0.4379607 | 0.3550369 | 0.3574939 |
imp3 | 48.06543 | 48.03745 | 48.09131 | 48.05879 | 48.06252 | 48.06325 | 0.4778870 | 0.3384521 | 0.3673219 | 0.2868550 |
imp4 | 48.07939 | 48.04066 | 48.10873 | 48.06645 | 48.07195 | 48.07381 | 0.4662162 | 0.3765356 | 0.4041769 | 0.4011057 |
imp5 | 48.08769 | 48.05208 | 48.11440 | 48.07758 | 48.08135 | 48.08294 | 0.3980344 | 0.3470516 | 0.3200246 | 0.3304668 |
以下使用tidyr::fill()
弥补80%数据缺失值。
data_tm1_80_tidyr <- data_tm1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_tm1_80_tidyr %>% anyNA
## [1] FALSE
data_tm1_80_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_80_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 1,028 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-08-07 00:02:00 111. 111. 111. 111. 111. 111.
## 2 2017-08-07 00:03:00 111. 111. 111. 111. 111. 111.
## 3 2017-08-07 00:04:00 111. 111. 111. 111. 111. 111.
## 4 2017-08-07 00:05:00 111. 111. 111. 111. 111. 111.
## 5 2017-08-07 00:40:00 111. 111. 111. 111. 111. 111.
## 6 2017-08-07 00:41:00 111. 111. 111. 111. 111. 111.
## 7 2017-08-07 00:42:00 111. 111. 111. 111. 111. 111.
## 8 2017-08-07 00:43:00 111. 111. 111. 111. 111. 111.
## 9 2017-08-07 00:44:00 111. 111. 111. 111. 111. 111.
## 10 2017-08-07 00:45:00 111. 111. 111. 111. 111. 111.
## # ... with 1,018 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_tm1_80_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_80_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
48.07033 | 48.03224 | 48.09007 | 48.06271 | 48.06167 | 48.06384 | 0.4932432 | 0.4404177 | 0.3912776 | 0.4551597 |
以下使用imputeTS::na.seadec()
弥补85%数据缺失值。
data_m1_NA <- data_m1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.85) %>%
cbind(data_m1[1], .) %>% tbl_df
data_m1_85_impTS <- llply(algo, function(x) {
data_m1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_m1_85_impTS) <- algo
data_m1_85_impTS %<>% ldply %>% tbl_df
data_m1_85_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_85_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 29,379 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 2 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 3 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 4 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 5 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 6 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 7 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 8 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 9 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 10 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## # ... with 29,369 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_m1_85_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_85_impTS %>%
kable(caption = 'MSE 85% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 0.0003568 | 0.0002415 | 0.0003425 | 0.0003855 | 0.0003232 | 0.0003316 | 0.5390964 | 0.4617429 | 0.4374038 | 0.5209120 |
kalman | 0.0003567 | 0.0002414 | 0.0003399 | 0.0003848 | 0.0003220 | 0.0003307 | 0.5336411 | 0.4581060 | 0.4311092 | 0.5150371 |
locf | 0.0009119 | 0.0007365 | 0.0009503 | 0.0008236 | 0.0008368 | 0.0008556 | 0.5592391 | 0.4825850 | 0.4667786 | 0.5409148 |
ma | 0.0004343 | 0.0003234 | 0.0004471 | 0.0004450 | 0.0004052 | 0.0004124 | 0.5718282 | 0.4860820 | 0.4788082 | 0.5623164 |
mean | 0.0521145 | 0.0515481 | 0.0514613 | 0.0510225 | 0.0513439 | 0.0515366 | 0.2410127 | 0.2432508 | 0.2106588 | 0.2477270 |
random | 0.3240766 | 0.0639116 | 0.0581029 | 0.0510604 | 0.0576916 | 0.1242879 | 0.9664289 | 0.1843614 | 0.9805567 | 0.9148133 |
以下使用Amelia::amelia()
弥补85%数据缺失值。
data_m1_85_amelia <- data_m1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
##
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106
data_m1_85_amelia %>% anyNA
## [1] FALSE
data_m1_85_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_85_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 24,613 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2016-10-17 00:00:00 104. 104. 104. 104. 104.
## 2 imp1 2016-10-17 00:01:00 104. 104. 104. 104. 104.
## 3 imp1 2016-10-17 00:02:00 104. 104. 104. 104. 104.
## 4 imp1 2016-10-17 00:03:00 104. 104. 104. 104. 104.
## 5 imp1 2016-10-17 00:05:00 104. 104. 104. 104. 104.
## 6 imp1 2016-10-17 00:06:00 104. 104. 104. 104. 104.
## 7 imp1 2016-10-17 00:08:00 104. 104. 104. 104. 104.
## 8 imp1 2016-10-17 00:10:00 104. 104. 104. 104. 104.
## 9 imp1 2016-10-17 00:11:00 104. 104. 104. 104. 104.
## 10 imp1 2016-10-17 00:12:00 104. 104. 104. 104. 104.
## # ... with 24,603 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_m1_85_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_85_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 0.0296854 | 0.0294646 | 0.0295183 | 0.0295090 | 0.0294973 | 0.0295443 | 0.5305637 | 0.4369842 | 0.4357253 | 0.5021681 |
imp2 | 0.0285124 | 0.0284944 | 0.0284871 | 0.0284606 | 0.0284807 | 0.0284886 | 0.5101413 | 0.4222968 | 0.4008952 | 0.4583858 |
imp3 | 0.0309386 | 0.0309092 | 0.0309865 | 0.0308879 | 0.0309279 | 0.0309305 | 0.5620366 | 0.4683172 | 0.4411806 | 0.5178347 |
imp4 | 0.0295915 | 0.0292050 | 0.0294749 | 0.0293755 | 0.0293518 | 0.0294117 | 0.5662330 | 0.4571269 | 0.4434187 | 0.5016086 |
imp5 | 0.0296229 | 0.0295257 | 0.0296607 | 0.0295922 | 0.0295929 | 0.0296004 | 0.5403553 | 0.4505525 | 0.4560078 | 0.5295846 |
以下使用tidyr::fill()
弥补85%数据缺失值。
data_m1_85_tidyr <- data_m1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_m1_85_tidyr %>% anyNA
## [1] FALSE
data_m1_85_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_85_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 4,980 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-10-17 00:00:00 104. 104. 104. 104. 104. 104.
## 2 2016-10-17 00:01:00 104. 104. 104. 104. 104. 104.
## 3 2016-10-17 00:02:00 104. 104. 104. 104. 104. 104.
## 4 2016-10-17 00:03:00 104. 104. 104. 104. 104. 104.
## 5 2016-10-17 00:18:00 104. 104. 104. 104. 104. 104.
## 6 2016-10-17 00:19:00 104. 104. 104. 104. 104. 104.
## 7 2016-10-17 00:20:00 104. 104. 104. 104. 104. 104.
## 8 2016-10-17 00:21:00 104. 104. 104. 104. 104. 104.
## 9 2016-10-17 00:22:00 104. 104. 104. 104. 104. 104.
## 10 2016-10-17 00:23:00 104. 104. 104. 104. 104. 104.
## # ... with 4,970 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_m1_85_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_85_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
0.0009119 | 0.0007365 | 0.0009503 | 0.0008236 | 0.0008368 | 0.0008556 | 0.5592391 | 0.482585 | 0.4667786 | 0.5409148 |
以下使用imputeTS::na.seadec()
弥补85%数据缺失值。
data_tm1_NA <- data_tm1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.85) %>%
cbind(data_tm1[1], .) %>% tbl_df
data_tm1_85_impTS <- llply(algo, function(x) {
data_tm1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_tm1_85_impTS) <- algo
data_tm1_85_impTS %<>% ldply %>% tbl_df
data_tm1_85_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_85_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 6,434 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 2 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 3 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 4 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 5 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 6 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 7 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 8 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 9 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 10 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## # ... with 6,424 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_tm1_85_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_85_impTS %>%
kable(caption = 'MSE 85% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 48.05584 | 48.04052 | 48.08479 | 48.06113 | 48.06214 | 48.06057 | 0.4410319 | 0.3660934 | 0.3900491 | 0.4606880 |
kalman | 48.05593 | 48.04052 | 48.08479 | 48.06086 | 48.06206 | 48.06052 | 0.4404177 | 0.3593366 | 0.3875921 | 0.4484029 |
locf | 48.06139 | 48.04274 | 48.09568 | 48.04604 | 48.06149 | 48.06146 | 0.5479115 | 0.4570025 | 0.5012285 | 0.5491400 |
ma | 48.05618 | 48.04110 | 48.08498 | 48.06091 | 48.06233 | 48.06079 | 0.5116708 | 0.4183047 | 0.4551597 | 0.5098280 |
mean | 48.11939 | 48.09357 | 48.10504 | 48.10364 | 48.10075 | 48.10541 | 0.2260442 | 0.1984029 | 0.2125307 | 0.2278870 |
random | 49.15168 | 47.72769 | 49.75232 | 47.47577 | 48.31860 | 48.52687 | 0.9895577 | 0.9889435 | 0.9631450 | 0.9649877 |
以下使用Amelia::amelia()
弥补85%数据缺失值。
data_tm1_85_amelia <- data_tm1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 181 182 183
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147
data_tm1_85_amelia %>% anyNA
## [1] FALSE
data_tm1_85_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_85_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 4,380 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2017-08-07 00:02:00 111. 111. 111. 111. 111.
## 2 imp1 2017-08-07 00:03:00 111. 111. 111. 111. 111.
## 3 imp1 2017-08-07 00:06:00 111. 111. 111. 111. 111.
## 4 imp1 2017-08-07 00:07:00 111. 111. 111. 111. 111.
## 5 imp1 2017-08-07 00:08:00 111. 111. 111. 111. 111.
## 6 imp1 2017-08-07 00:09:00 111. 111. 111. 111. 111.
## 7 imp1 2017-08-07 00:11:00 111. 111. 111. 111. 111.
## 8 imp1 2017-08-07 00:17:00 111. 111. 111. 111. 111.
## 9 imp1 2017-08-07 00:18:00 111. 111. 111. 111. 111.
## 10 imp1 2017-08-07 00:24:00 111. 111. 111. 111. 111.
## # ... with 4,370 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_tm1_85_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_85_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 48.06422 | 48.06042 | 48.11247 | 48.11366 | 48.09552 | 48.08769 | 0.3765356 | 0.3568796 | 0.3427518 | 0.3820639 |
imp2 | 48.07333 | 48.06250 | 48.11384 | 48.10463 | 48.09366 | 48.08858 | 0.3562654 | 0.3427518 | 0.3200246 | 0.3839066 |
imp3 | 48.06908 | 48.06095 | 48.10012 | 48.10192 | 48.08767 | 48.08302 | 0.3568796 | 0.3396806 | 0.3218673 | 0.3488943 |
imp4 | 48.07092 | 48.06230 | 48.11810 | 48.08630 | 48.08890 | 48.08441 | 0.4459459 | 0.3200246 | 0.3495086 | 0.2696560 |
imp5 | 48.05452 | 48.04422 | 48.10806 | 48.09895 | 48.08375 | 48.07644 | 0.4299754 | 0.4103194 | 0.3869779 | 0.4434889 |
以下使用tidyr::fill()
弥补85%数据缺失值。
data_tm1_85_tidyr <- data_tm1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_tm1_85_tidyr %>% anyNA
## [1] FALSE
data_tm1_85_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_85_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 1,170 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-08-07 00:02:00 111. 111. 111. 111. 111. 111.
## 2 2017-08-07 00:03:00 111. 111. 111. 111. 111. 111.
## 3 2017-08-07 00:04:00 111. 111. 111. 111. 111. 111.
## 4 2017-08-07 00:17:00 111. 111. 111. 111. 111. 111.
## 5 2017-08-07 00:18:00 111. 111. 111. 111. 111. 111.
## 6 2017-08-07 00:22:00 111. 111. 111. 111. 111. 111.
## 7 2017-08-07 00:24:00 111. 111. 111. 111. 111. 111.
## 8 2017-08-07 00:38:00 111. 111. 111. 111. 111. 111.
## 9 2017-08-07 00:39:00 111. 111. 111. 111. 111. 111.
## 10 2017-08-07 00:40:00 111. 111. 111. 111. 111. 111.
## # ... with 1,160 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_tm1_85_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_85_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
48.06139 | 48.04274 | 48.09568 | 48.04604 | 48.06149 | 48.06146 | 0.5479115 | 0.4570025 | 0.5012285 | 0.54914 |
以下使用imputeTS::na.seadec()
弥补90%数据缺失值。
data_m1_NA <- data_m1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.9) %>%
cbind(data_m1[1], .) %>% tbl_df
data_m1_90_impTS <- llply(algo, function(x) {
data_m1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_m1_90_impTS) <- algo
data_m1_90_impTS %<>% ldply %>% tbl_df
data_m1_90_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_90_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 35,473 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 2 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 3 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 4 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 5 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 6 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 7 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 8 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 9 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## 10 inte~ 104. 104. 104. 104. 104. 104. 104. 104.
## # ... with 35,463 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_m1_90_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_90_impTS %>%
kable(caption = 'MSE 90% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 0.0004602 | 0.0005112 | 0.0004253 | 0.0005540 | 0.0004968 | 0.0004877 | 0.5963072 | 0.5367184 | 0.4974122 | 0.6212058 |
kalman | 0.0004602 | 0.0005172 | 0.0004253 | 0.0005541 | 0.0004989 | 0.0004892 | 0.5978459 | 0.5383970 | 0.4965729 | 0.6207861 |
locf | 0.0015472 | 0.0012908 | 0.0012546 | 0.0015022 | 0.0013492 | 0.0013987 | 0.6398098 | 0.5505665 | 0.5153168 | 0.6048398 |
ma | 0.0006192 | 0.0006425 | 0.0005668 | 0.0007449 | 0.0006514 | 0.0006434 | 0.6475031 | 0.5676318 | 0.5312631 | 0.6360330 |
mean | 0.0554369 | 0.0545033 | 0.0548398 | 0.0556674 | 0.0550035 | 0.0551119 | 0.9363547 | 0.9440481 | 0.1286893 | 0.9380333 |
random | 0.0716249 | 0.1366404 | 0.0568678 | 0.0790605 | 0.0908562 | 0.0860484 | 0.1349839 | 0.0870052 | 0.9282417 | 0.9373339 |
以下使用Amelia::amelia()
弥补90%数据缺失值。
data_m1_90_amelia <- data_m1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
## 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
## 221 222
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
## 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
## 221 222 223 224 225 226 227 228 229 230 231 232
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
## 201 202 203 204 205 206 207 208 209 210 211
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
## 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
## 221 222 223
data_m1_90_amelia %>% anyNA
## [1] FALSE
data_m1_90_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_90_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 24,833 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2016-10-17 00:00:00 104. 104. 104. 104. 104.
## 2 imp1 2016-10-17 00:01:00 104. 104. 104. 104. 104.
## 3 imp1 2016-10-17 00:02:00 104. 104. 104. 104. 104.
## 4 imp1 2016-10-17 00:04:00 104. 104. 104. 104. 104.
## 5 imp1 2016-10-17 00:05:00 104. 104. 104. 104. 104.
## 6 imp1 2016-10-17 00:08:00 104. 104. 104. 104. 104.
## 7 imp1 2016-10-17 00:09:00 104. 104. 104. 104. 104.
## 8 imp1 2016-10-17 00:10:00 104. 104. 104. 104. 104.
## 9 imp1 2016-10-17 00:13:00 104. 104. 104. 104. 104.
## 10 imp1 2016-10-17 00:14:00 104. 104. 104. 104. 104.
## # ... with 24,823 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_m1_90_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_90_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 0.0461595 | 0.0461769 | 0.0463285 | 0.0462737 | 0.0462597 | 0.0462346 | 0.5740663 | 0.4841237 | 0.4734928 | 0.5550427 |
imp2 | 0.0444710 | 0.0444534 | 0.0444560 | 0.0445331 | 0.0444808 | 0.0444784 | 0.5337809 | 0.4397818 | 0.4294307 | 0.4981116 |
imp3 | 0.0458608 | 0.0460663 | 0.0465595 | 0.0462600 | 0.0462953 | 0.0461867 | 0.5509862 | 0.4834243 | 0.4192195 | 0.5245489 |
imp4 | 0.0462646 | 0.0464083 | 0.0464868 | 0.0463752 | 0.0464234 | 0.0463837 | 0.5572807 | 0.4809064 | 0.4618828 | 0.5704294 |
imp5 | 0.0440889 | 0.0441617 | 0.0444101 | 0.0440200 | 0.0441973 | 0.0441702 | 0.5168555 | 0.4466359 | 0.4127850 | 0.5109806 |
以下使用tidyr::fill()
弥补90%数据缺失值。
data_m1_90_tidyr <- data_m1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_m1_90_tidyr %>% anyNA
## [1] FALSE
data_m1_90_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_m1_90_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 5,503 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-10-17 00:00:00 104. 104. 104. 104. 104. 104.
## 2 2016-10-17 00:01:00 104. 104. 104. 104. 104. 104.
## 3 2016-10-17 00:02:00 104. 104. 104. 104. 104. 104.
## 4 2016-10-17 00:03:00 104. 104. 104. 104. 104. 104.
## 5 2016-10-17 00:04:00 104. 104. 104. 104. 104. 104.
## 6 2016-10-17 00:05:00 104. 104. 104. 104. 104. 104.
## 7 2016-10-17 00:06:00 104. 104. 104. 104. 104. 104.
## 8 2016-10-17 00:07:00 104. 104. 104. 104. 104. 104.
## 9 2016-10-17 00:08:00 104. 104. 104. 104. 104. 104.
## 10 2016-10-17 00:09:00 104. 104. 104. 104. 104. 104.
## # ... with 5,493 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_m1_90_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_m1_90_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
0.0015472 | 0.0012908 | 0.0012546 | 0.0015022 | 0.0013492 | 0.0013987 | 0.6398098 | 0.5505665 | 0.5153168 | 0.6048398 |
以下使用imputeTS::na.seadec()
弥补90%数据缺失值。
data_tm1_NA <- data_tm1 %>%
dplyr::select(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>%
prodNA(noNA = 0.9) %>%
cbind(data_tm1[1], .) %>% tbl_df
data_tm1_90_impTS <- llply(algo, function(x) {
data_tm1_NA %>%
dplyr::select(starts_with('Ask'), starts_with('Bid')) %>%
map(na.seadec, algorithm = x) %>% as.tibble
})
names(data_tm1_90_impTS) <- algo
data_tm1_90_impTS %<>% ldply %>% tbl_df
data_tm1_90_impTS %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_90_impTS %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 6,763 x 13
## .id AskOpen AskHigh AskLow AskClose BidOpen BidHigh BidLow BidClose
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 2 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 3 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 4 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 5 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 6 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 7 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 8 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 9 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## 10 inte~ 111. 111. 111. 111. 111. 111. 111. 111.
## # ... with 6,753 more rows, and 4 more variables: bias.open <dbl>,
## # bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
data_tm1_90_impTS %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_90_impTS %>%
kable(caption = 'MSE 90% 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
interpolation | 48.09238 | 48.02948 | 48.08101 | 48.06488 | 48.05846 | 48.06694 | 0.5270270 | 0.5165848 | 0.4600737 | 0.6339066 |
kalman | 48.09188 | 48.03086 | 48.08101 | 48.06581 | 48.05923 | 48.06739 | 0.5221130 | 0.5079853 | 0.4459459 | 0.6216216 |
locf | 48.09342 | 48.02838 | 48.08001 | 48.02318 | 48.04386 | 48.05625 | 0.5368550 | 0.5018428 | 0.5122850 | 0.6375921 |
ma | 48.09193 | 48.03144 | 48.08093 | 48.06593 | 48.05943 | 48.06756 | 0.5737101 | 0.5110565 | 0.4944717 | 0.6228501 |
mean | 48.03712 | 48.06868 | 48.06397 | 48.17040 | 48.10102 | 48.08504 | 0.1572482 | 0.1547912 | 0.1480344 | 0.1658477 |
random | 48.11770 | 47.55666 | 47.88066 | 47.62295 | 47.68676 | 47.79449 | 0.9410319 | 0.9367322 | 0.9613022 | 0.9637592 |
以下使用Amelia::amelia()
弥补90%数据缺失值。
data_tm1_90_amelia <- data_tm1_NA %>%
amelia %>%
.$imputations %>%
ldply %>% tbl_df
## -- Imputation 1 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
## 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
## 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
## 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
## 261
##
## -- Imputation 2 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
## 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
## 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
## 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
## 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
## 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
## 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
## 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
## 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
## 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
##
## -- Imputation 3 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
## 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
## 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
## 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
## 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
## 281 282 283 284 285 286 287 288 289
##
## -- Imputation 4 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
## 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
## 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
## 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
## 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
##
##
## -- Imputation 5 --
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
## 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
## 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
## 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
## 261
data_tm1_90_amelia %>% anyNA
## [1] FALSE
data_tm1_90_amelia %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_90_amelia %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 4,291 x 14
## .id index BidOpen BidHigh BidLow BidClose AskOpen
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 imp1 2017-08-07 00:02:00 111. 111. 111. 111. 111.
## 2 imp1 2017-08-07 00:07:00 111. 111. 111. 111. 111.
## 3 imp1 2017-08-07 00:08:00 111. 111. 111. 111. 111.
## 4 imp1 2017-08-07 00:09:00 111. 111. 111. 111. 111.
## 5 imp1 2017-08-07 00:22:00 111. 111. 111. 111. 111.
## 6 imp1 2017-08-07 00:42:00 111. 111. 111. 111. 111.
## 7 imp1 2017-08-07 00:44:00 111. 111. 111. 111. 111.
## 8 imp1 2017-08-07 00:45:00 111. 111. 111. 111. 111.
## 9 imp1 2017-08-07 00:55:00 111. 111. 111. 111. 111.
## 10 imp1 2017-08-07 00:57:00 111. 111. 111. 111. 111.
## # ... with 4,281 more rows, and 7 more variables: AskHigh <dbl>,
## # AskLow <dbl>, AskClose <dbl>, bias.open <dbl>, bias.high <dbl>,
## # bias.low <dbl>, bias.close <dbl>
data_tm1_90_amelia %<>%
ddply(.(.id), summarise,
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_90_amelia %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
.id | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|---|
imp1 | 48.08072 | 48.03486 | 48.07313 | 48.05499 | 48.05433 | 48.06093 | 0.4189189 | 0.3409091 | 0.3114251 | 0.3544226 |
imp2 | 48.10489 | 48.05921 | 48.08469 | 48.07178 | 48.07189 | 48.08014 | 0.4600737 | 0.3789926 | 0.2911548 | 0.3568796 |
imp3 | 48.07160 | 48.04393 | 48.06767 | 48.04916 | 48.05359 | 48.05809 | 0.3452088 | 0.3132678 | 0.2463145 | 0.3003686 |
imp4 | 48.10228 | 48.04703 | 48.07421 | 48.07541 | 48.06555 | 48.07473 | 0.4576167 | 0.3851351 | 0.2585995 | 0.3538084 |
imp5 | 48.06921 | 48.04484 | 48.06735 | 48.03865 | 48.05028 | 48.05501 | 0.4299754 | 0.2929975 | 0.3108108 | 0.3882064 |
以下使用tidyr::fill()
弥补90%数据缺失值。
data_tm1_90_tidyr <- data_tm1_NA %>%
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose) %>% #default direction down
fill(BidOpen, BidHigh, BidLow, BidClose,
AskOpen, AskHigh, AskLow, AskClose, .direction = 'up')
data_tm1_90_tidyr %>% anyNA
## [1] FALSE
data_tm1_90_tidyr %<>% mutate(
bias.open = if_else(AskOpen>AskHigh|AskOpen<AskLow, 1, 0),
bias.high = if_else(AskHigh<AskOpen|AskHigh<AskLow|AskHigh<AskClose, 1, 0),
bias.low = if_else(AskLow>AskOpen|AskLow>AskHigh|AskLow>AskClose, 1, 0),
bias.close = if_else(AskClose>AskHigh|AskClose<AskLow, 1, 0))
data_tm1_90_tidyr %>%
dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
## # A tibble: 1,183 x 13
## index BidOpen BidHigh BidLow BidClose AskOpen AskHigh
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-08-07 00:18:00 111. 111. 111. 111. 111. 111.
## 2 2017-08-07 00:22:00 111. 111. 111. 111. 111. 111.
## 3 2017-08-07 00:24:00 111. 111. 111. 111. 111. 111.
## 4 2017-08-07 00:38:00 111. 111. 111. 111. 111. 111.
## 5 2017-08-07 00:39:00 111. 111. 111. 111. 111. 111.
## 6 2017-08-07 00:40:00 111. 111. 111. 111. 111. 111.
## 7 2017-08-07 00:41:00 111. 111. 111. 111. 111. 111.
## 8 2017-08-07 00:42:00 111. 111. 111. 111. 111. 111.
## 9 2017-08-07 00:43:00 111. 111. 111. 111. 111. 111.
## 10 2017-08-07 00:44:00 111. 111. 111. 111. 111. 111.
## # ... with 1,173 more rows, and 6 more variables: AskLow <dbl>,
## # AskClose <dbl>, bias.open <dbl>, bias.high <dbl>, bias.low <dbl>,
## # bias.close <dbl>
data_tm1_90_tidyr %<>%
summarise(
AskOpen = mean((AskOpen - data_m1$AskOpen)^2),
AskHigh = mean((AskHigh - data_m1$AskHigh)^2),
AskLow = mean((AskLow - data_m1$AskLow)^2),
AskClose = mean((AskClose - data_m1$AskClose)^2),
Mean.HLC = (AskHigh + AskLow + AskClose)/3,
Mean.OHLC = (AskOpen + AskHigh + AskLow + AskClose)/4,
bias.open = sum(bias.open)/length(bias.open),
bias.high = sum(bias.high)/length(bias.high),
bias.low = sum(bias.low)/length(bias.low),
bias.close = sum(bias.close)/length(bias.close)) %>% tbl_df
data_tm1_90_tidyr %>%
kable(caption = 'MSE') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close |
---|---|---|---|---|---|---|---|---|---|
48.09342 | 48.02838 | 48.08001 | 48.02318 | 48.04386 | 48.05625 | 0.536855 | 0.5018428 | 0.512285 | 0.6375921 |
以下日内数据比较出在指定的缺失值占有数,该数据弥补得来后的误差与偏差会有多高。
dfm1_impTS <- list(`1%` = data_m1_1_impTS, `10%` = data_m1_10_impTS,
`20%` = data_m1_20_impTS, `30%` = data_m1_30_impTS,
`50%` = data_m1_50_impTS, `65%` = data_m1_65_impTS,
`70%` = data_m1_70_impTS, `80%` = data_m1_80_impTS,
`85%` = data_m1_85_impTS, `90%` = data_m1_90_impTS)
dfm2_impTS <- list(`1%` = data_tm1_1_impTS, `10%` = data_tm1_10_impTS,
`20%` = data_tm1_20_impTS, `30%` = data_tm1_30_impTS,
`50%` = data_tm1_50_impTS, `65%` = data_tm1_65_impTS,
`70%` = data_tm1_70_impTS, `80%` = data_tm1_80_impTS,
`85%` = data_tm1_85_impTS, `90%` = data_tm1_90_impTS)
## Summarise
dfm1_impTS %<>% ldply(function(x) x %>% dplyr::rename(Model = .id)) %>%
tbl_df %>%
mutate(bias = (bias.open + bias.high + bias.low + bias.close)/4) %>%
arrange(Mean.HLC, Mean.OHLC, bias)
dfm2_impTS %<>% ldply(function(x) x %>% dplyr::rename(Model = .id)) %>%
tbl_df %>%
mutate(bias = (bias.open + bias.high + bias.low + bias.close)/4) %>%
arrange(Mean.HLC, Mean.OHLC, bias)
dfm1_impTS %>%
kable(caption = 'MSE 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%', height = '400px')
.id | Model | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close | bias |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1% | kalman | 0.0000006 | 0.0000008 | 0.0000009 | 0.0000013 | 0.0000010 | 0.0000009 | 0.0050357 | 0.0064345 | 0.0050357 | 0.0064345 | 0.0057351 |
1% | interpolation | 0.0000006 | 0.0000008 | 0.0000009 | 0.0000013 | 0.0000010 | 0.0000009 | 0.0048958 | 0.0062946 | 0.0048958 | 0.0062946 | 0.0055952 |
1% | ma | 0.0000007 | 0.0000014 | 0.0000012 | 0.0000013 | 0.0000013 | 0.0000012 | 0.0079731 | 0.0088124 | 0.0067142 | 0.0086725 | 0.0080431 |
1% | locf | 0.0000014 | 0.0000031 | 0.0000018 | 0.0000036 | 0.0000029 | 0.0000025 | 0.0037768 | 0.0053154 | 0.0043363 | 0.0058749 | 0.0048258 |
10% | interpolation | 0.0000090 | 0.0000071 | 0.0000075 | 0.0000095 | 0.0000080 | 0.0000083 | 0.0495174 | 0.0479787 | 0.0492377 | 0.0496573 | 0.0490978 |
10% | kalman | 0.0000090 | 0.0000071 | 0.0000075 | 0.0000096 | 0.0000081 | 0.0000083 | 0.0513359 | 0.0500769 | 0.0509162 | 0.0517555 | 0.0510211 |
10% | ma | 0.0000108 | 0.0000100 | 0.0000107 | 0.0000129 | 0.0000112 | 0.0000111 | 0.0672821 | 0.0636453 | 0.0614072 | 0.0665827 | 0.0647293 |
20% | interpolation | 0.0000229 | 0.0000146 | 0.0000154 | 0.0000201 | 0.0000167 | 0.0000182 | 0.1054693 | 0.0988950 | 0.0949783 | 0.0948384 | 0.0985453 |
20% | kalman | 0.0000229 | 0.0000146 | 0.0000154 | 0.0000202 | 0.0000167 | 0.0000183 | 0.1088264 | 0.1022521 | 0.0976360 | 0.0986152 | 0.1018324 |
10% | locf | 0.0000190 | 0.0000160 | 0.0000167 | 0.0000208 | 0.0000178 | 0.0000181 | 0.0530144 | 0.0507763 | 0.0471395 | 0.0464401 | 0.0493426 |
20% | ma | 0.0000275 | 0.0000192 | 0.0000198 | 0.0000245 | 0.0000212 | 0.0000228 | 0.1292488 | 0.1209959 | 0.1112044 | 0.1222549 | 0.1209260 |
30% | interpolation | 0.0000374 | 0.0000247 | 0.0000282 | 0.0000342 | 0.0000290 | 0.0000311 | 0.1573647 | 0.1435166 | 0.1454749 | 0.1502308 | 0.1491467 |
30% | kalman | 0.0000373 | 0.0000247 | 0.0000282 | 0.0000344 | 0.0000291 | 0.0000312 | 0.1603021 | 0.1458945 | 0.1481326 | 0.1537278 | 0.1520143 |
30% | ma | 0.0000425 | 0.0000321 | 0.0000351 | 0.0000407 | 0.0000359 | 0.0000376 | 0.1805847 | 0.1612813 | 0.1653378 | 0.1772276 | 0.1711078 |
20% | locf | 0.0000526 | 0.0000327 | 0.0000319 | 0.0000438 | 0.0000362 | 0.0000403 | 0.1105050 | 0.0969366 | 0.0981956 | 0.0916212 | 0.0993146 |
30% | locf | 0.0000830 | 0.0000646 | 0.0000672 | 0.0000785 | 0.0000701 | 0.0000733 | 0.1658973 | 0.1559659 | 0.1465939 | 0.1577843 | 0.1565604 |
50% | interpolation | 0.0000704 | 0.0000582 | 0.0000780 | 0.0000750 | 0.0000704 | 0.0000704 | 0.2460484 | 0.2282837 | 0.2087005 | 0.2559799 | 0.2347531 |
50% | kalman | 0.0000704 | 0.0000582 | 0.0000783 | 0.0000750 | 0.0000705 | 0.0000705 | 0.2530424 | 0.2322003 | 0.2149951 | 0.2632536 | 0.2408728 |
50% | ma | 0.0000804 | 0.0000680 | 0.0001001 | 0.0000872 | 0.0000851 | 0.0000839 | 0.2808784 | 0.2552805 | 0.2345783 | 0.2899706 | 0.2651769 |
65% | kalman | 0.0001282 | 0.0001238 | 0.0001133 | 0.0001202 | 0.0001191 | 0.0001214 | 0.3674640 | 0.3241013 | 0.2845153 | 0.3415862 | 0.3294167 |
65% | interpolation | 0.0001279 | 0.0001238 | 0.0001136 | 0.0001206 | 0.0001193 | 0.0001215 | 0.3662051 | 0.3241013 | 0.2831165 | 0.3394880 | 0.3282277 |
65% | ma | 0.0001512 | 0.0001488 | 0.0001313 | 0.0001449 | 0.0001417 | 0.0001441 | 0.3813121 | 0.3329137 | 0.3022800 | 0.3621486 | 0.3446636 |
70% | kalman | 0.0001618 | 0.0001265 | 0.0001262 | 0.0001725 | 0.0001417 | 0.0001468 | 0.3959994 | 0.3457826 | 0.3231221 | 0.3883061 | 0.3633026 |
70% | interpolation | 0.0001618 | 0.0001265 | 0.0001262 | 0.0001725 | 0.0001417 | 0.0001468 | 0.3927822 | 0.3453630 | 0.3225626 | 0.3881662 | 0.3622185 |
50% | locf | 0.0001652 | 0.0001420 | 0.0001480 | 0.0001916 | 0.0001606 | 0.0001617 | 0.2764023 | 0.2480067 | 0.2397538 | 0.2771017 | 0.2603161 |
70% | ma | 0.0001928 | 0.0001537 | 0.0001548 | 0.0001982 | 0.0001689 | 0.0001749 | 0.4151630 | 0.3650860 | 0.3400476 | 0.4130648 | 0.3833403 |
80% | interpolation | 0.0002653 | 0.0002080 | 0.0002403 | 0.0002507 | 0.0002330 | 0.0002411 | 0.4865016 | 0.4063505 | 0.4062107 | 0.4692964 | 0.4420898 |
80% | kalman | 0.0002647 | 0.0002080 | 0.0002403 | 0.0002511 | 0.0002331 | 0.0002410 | 0.4876206 | 0.4055113 | 0.4049517 | 0.4662191 | 0.4410757 |
65% | locf | 0.0002955 | 0.0003304 | 0.0002383 | 0.0002986 | 0.0002891 | 0.0002907 | 0.3817317 | 0.3404672 | 0.3154287 | 0.3776752 | 0.3538257 |
80% | ma | 0.0003359 | 0.0002728 | 0.0002943 | 0.0003105 | 0.0002925 | 0.0003034 | 0.5045461 | 0.4288712 | 0.4227165 | 0.4869212 | 0.4607637 |
85% | kalman | 0.0003567 | 0.0002414 | 0.0003399 | 0.0003848 | 0.0003220 | 0.0003307 | 0.5336411 | 0.4581060 | 0.4311092 | 0.5150371 | 0.4844734 |
85% | interpolation | 0.0003568 | 0.0002415 | 0.0003425 | 0.0003855 | 0.0003232 | 0.0003316 | 0.5390964 | 0.4617429 | 0.4374038 | 0.5209120 | 0.4897888 |
70% | locf | 0.0003904 | 0.0003374 | 0.0003273 | 0.0004098 | 0.0003581 | 0.0003662 | 0.4253742 | 0.3855085 | 0.3597706 | 0.4227165 | 0.3983424 |
85% | ma | 0.0004343 | 0.0003234 | 0.0004471 | 0.0004450 | 0.0004052 | 0.0004124 | 0.5718282 | 0.4860820 | 0.4788082 | 0.5623164 | 0.5247587 |
90% | interpolation | 0.0004602 | 0.0005112 | 0.0004253 | 0.0005540 | 0.0004968 | 0.0004877 | 0.5963072 | 0.5367184 | 0.4974122 | 0.6212058 | 0.5629109 |
90% | kalman | 0.0004602 | 0.0005172 | 0.0004253 | 0.0005541 | 0.0004989 | 0.0004892 | 0.5978459 | 0.5383970 | 0.4965729 | 0.6207861 | 0.5634005 |
1% | mean | 0.0005507 | 0.0005905 | 0.0004618 | 0.0006969 | 0.0005831 | 0.0005750 | 0.0194433 | 0.0179046 | 0.0205623 | 0.0194433 | 0.0193384 |
80% | locf | 0.0007388 | 0.0005612 | 0.0005608 | 0.0006582 | 0.0005934 | 0.0006297 | 0.5302840 | 0.4497132 | 0.4574066 | 0.5218912 | 0.4898238 |
90% | ma | 0.0006192 | 0.0006425 | 0.0005668 | 0.0007449 | 0.0006514 | 0.0006434 | 0.6475031 | 0.5676318 | 0.5312631 | 0.6360330 | 0.5956078 |
85% | locf | 0.0009119 | 0.0007365 | 0.0009503 | 0.0008236 | 0.0008368 | 0.0008556 | 0.5592391 | 0.4825850 | 0.4667786 | 0.5409148 | 0.5123794 |
1% | random | 0.0024076 | 0.0006038 | 0.0023222 | 0.0008296 | 0.0012519 | 0.0015408 | 0.0145475 | 0.0093719 | 0.0195832 | 0.0145475 | 0.0145125 |
90% | locf | 0.0015472 | 0.0012908 | 0.0012546 | 0.0015022 | 0.0013492 | 0.0013987 | 0.6398098 | 0.5505665 | 0.5153168 | 0.6048398 | 0.5776332 |
10% | mean | 0.0057660 | 0.0063513 | 0.0060217 | 0.0066473 | 0.0063401 | 0.0061966 | 0.1719122 | 0.1493915 | 0.1814240 | 0.1751294 | 0.1694643 |
20% | mean | 0.0123887 | 0.0117783 | 0.0116303 | 0.0126437 | 0.0120174 | 0.0121103 | 0.3066163 | 0.2652119 | 0.2957057 | 0.3014408 | 0.2922437 |
30% | mean | 0.0190881 | 0.0174648 | 0.0182534 | 0.0190274 | 0.0182485 | 0.0184584 | 0.4018744 | 0.3536159 | 0.3729193 | 0.4076095 | 0.3840048 |
10% | random | 0.0077868 | 0.0226350 | 0.0087245 | 0.0291799 | 0.0201798 | 0.0170816 | 0.1092460 | 0.0870052 | 0.1375017 | 0.1214156 | 0.1137921 |
50% | mean | 0.0306066 | 0.0306853 | 0.0305917 | 0.0302918 | 0.0305230 | 0.0305439 | 0.4684571 | 0.4066303 | 0.4129249 | 0.4671982 | 0.4388026 |
20% | random | 0.0308892 | 0.0318163 | 0.0327874 | 0.0353737 | 0.0333258 | 0.0327167 | 0.3551546 | 0.3561337 | 0.3119317 | 0.3477409 | 0.3427402 |
65% | mean | 0.0400602 | 0.0406286 | 0.0391629 | 0.0395314 | 0.0397743 | 0.0398458 | 0.4402014 | 0.3983774 | 0.3761365 | 0.4364247 | 0.4127850 |
70% | mean | 0.0434017 | 0.0423406 | 0.0426192 | 0.0426670 | 0.0425423 | 0.0427571 | 0.4084487 | 0.3775353 | 0.3564135 | 0.4071898 | 0.3873968 |
80% | mean | 0.0486525 | 0.0484767 | 0.0482804 | 0.0487259 | 0.0484943 | 0.0485339 | 0.2989229 | 0.2805987 | 0.2605959 | 0.2943069 | 0.2836061 |
50% | random | 0.0401562 | 0.0407120 | 0.0546502 | 0.0518731 | 0.0490784 | 0.0468479 | 0.8181564 | 0.7682193 | 0.7954959 | 0.7911596 | 0.7932578 |
85% | mean | 0.0521145 | 0.0515481 | 0.0514613 | 0.0510225 | 0.0513439 | 0.0515366 | 0.2410127 | 0.2432508 | 0.2106588 | 0.2477270 | 0.2356623 |
90% | mean | 0.0554369 | 0.0545033 | 0.0548398 | 0.0556674 | 0.0550035 | 0.0551119 | 0.9363547 | 0.9440481 | 0.1286893 | 0.9380333 | 0.7367814 |
85% | random | 0.3240766 | 0.0639116 | 0.0581029 | 0.0510604 | 0.0576916 | 0.1242879 | 0.9664289 | 0.1843614 | 0.9805567 | 0.9148133 | 0.7615401 |
30% | random | 0.0891399 | 0.0503076 | 0.0991078 | 0.0534894 | 0.0676349 | 0.0730112 | 0.4879004 | 0.5048258 | 0.3189257 | 0.3983774 | 0.4275073 |
80% | random | 0.0967558 | 0.0565147 | 0.1428038 | 0.0580634 | 0.0857940 | 0.0885344 | 0.8461323 | 0.9290810 | 0.1155406 | 0.8341027 | 0.6812142 |
90% | random | 0.0716249 | 0.1366404 | 0.0568678 | 0.0790605 | 0.0908562 | 0.0860484 | 0.1349839 | 0.0870052 | 0.9282417 | 0.9373339 | 0.5218912 |
65% | random | 0.0533171 | 0.0806651 | 0.1361035 | 0.1684688 | 0.1284125 | 0.1096386 | 0.2796195 | 0.2432508 | 0.6848510 | 0.6998182 | 0.4768849 |
70% | random | 0.1551202 | 0.0523911 | 0.1117988 | 0.2454679 | 0.1365526 | 0.1411945 | 0.8742481 | 0.8708910 | 0.7637432 | 0.8887956 | 0.8494195 |
dfm2_impTS %>%
kable(caption = 'MSE 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%', height = '400px')
.id | Model | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close | bias |
---|---|---|---|---|---|---|---|---|---|---|---|---|
90% | random | 48.11770 | 47.55666 | 47.88066 | 47.62295 | 47.68676 | 47.79449 | 0.9410319 | 0.9367322 | 0.9613022 | 0.9637592 | 0.9507064 |
50% | random | 48.61875 | 48.18569 | 47.83183 | 47.39597 | 47.80450 | 48.00806 | 0.6044226 | 0.5761671 | 0.5909091 | 0.6173219 | 0.5972052 |
20% | random | 48.45310 | 48.01260 | 48.05931 | 47.86129 | 47.97773 | 48.09657 | 0.3366093 | 0.3218673 | 0.3126536 | 0.3359951 | 0.3267813 |
90% | locf | 48.09342 | 48.02838 | 48.08001 | 48.02318 | 48.04386 | 48.05625 | 0.5368550 | 0.5018428 | 0.5122850 | 0.6375921 | 0.5471437 |
70% | mean | 48.12663 | 48.00807 | 48.08176 | 48.06426 | 48.05137 | 48.07018 | 0.8335381 | 0.7665848 | 0.3243243 | 0.3863636 | 0.5777027 |
30% | random | 48.25972 | 48.06207 | 48.33286 | 47.77778 | 48.05757 | 48.10811 | 0.5165848 | 0.4864865 | 0.5558968 | 0.5423833 | 0.5253378 |
90% | interpolation | 48.09238 | 48.02948 | 48.08101 | 48.06488 | 48.05846 | 48.06694 | 0.5270270 | 0.5165848 | 0.4600737 | 0.6339066 | 0.5343980 |
90% | kalman | 48.09188 | 48.03086 | 48.08101 | 48.06581 | 48.05923 | 48.06739 | 0.5221130 | 0.5079853 | 0.4459459 | 0.6216216 | 0.5244165 |
90% | ma | 48.09193 | 48.03144 | 48.08093 | 48.06593 | 48.05943 | 48.06756 | 0.5737101 | 0.5110565 | 0.4944717 | 0.6228501 | 0.5505221 |
85% | locf | 48.06139 | 48.04274 | 48.09568 | 48.04604 | 48.06149 | 48.06146 | 0.5479115 | 0.4570025 | 0.5012285 | 0.5491400 | 0.5138206 |
80% | locf | 48.07033 | 48.03224 | 48.09007 | 48.06271 | 48.06167 | 48.06384 | 0.4932432 | 0.4404177 | 0.3912776 | 0.4551597 | 0.4450246 |
85% | kalman | 48.05593 | 48.04052 | 48.08479 | 48.06086 | 48.06206 | 48.06052 | 0.4404177 | 0.3593366 | 0.3875921 | 0.4484029 | 0.4089373 |
85% | interpolation | 48.05584 | 48.04052 | 48.08479 | 48.06113 | 48.06214 | 48.06057 | 0.4410319 | 0.3660934 | 0.3900491 | 0.4606880 | 0.4144656 |
85% | ma | 48.05618 | 48.04110 | 48.08498 | 48.06091 | 48.06233 | 48.06079 | 0.5116708 | 0.4183047 | 0.4551597 | 0.5098280 | 0.4737408 |
70% | locf | 48.05579 | 48.03106 | 48.07434 | 48.08234 | 48.06258 | 48.06088 | 0.4029484 | 0.4004914 | 0.3003686 | 0.4054054 | 0.3773034 |
70% | ma | 48.05462 | 48.02955 | 48.08596 | 48.07753 | 48.06435 | 48.06191 | 0.3617936 | 0.3445946 | 0.2856265 | 0.3703931 | 0.3406020 |
70% | kalman | 48.05440 | 48.02955 | 48.08650 | 48.07724 | 48.06443 | 48.06192 | 0.3415233 | 0.3212531 | 0.2653563 | 0.3445946 | 0.3181818 |
70% | interpolation | 48.05448 | 48.02946 | 48.08666 | 48.07724 | 48.06445 | 48.06196 | 0.3421376 | 0.3243243 | 0.2616708 | 0.3452088 | 0.3183354 |
50% | locf | 48.06433 | 48.03798 | 48.09349 | 48.06476 | 48.06541 | 48.06514 | 0.2807125 | 0.2444717 | 0.2242015 | 0.2585995 | 0.2519963 |
50% | kalman | 48.06676 | 48.03676 | 48.09347 | 48.06627 | 48.06550 | 48.06581 | 0.2340295 | 0.2168305 | 0.1848894 | 0.2315725 | 0.2168305 |
50% | interpolation | 48.06646 | 48.03692 | 48.09347 | 48.06623 | 48.06554 | 48.06577 | 0.2223587 | 0.2094595 | 0.1799754 | 0.2260442 | 0.2094595 |
50% | ma | 48.06724 | 48.03628 | 48.09353 | 48.06685 | 48.06555 | 48.06597 | 0.2469287 | 0.2266585 | 0.1996314 | 0.2432432 | 0.2291155 |
1% | locf | 48.06471 | 48.03943 | 48.09250 | 48.06706 | 48.06633 | 48.06592 | 0.0079853 | 0.0067568 | 0.0030713 | 0.0024570 | 0.0050676 |
65% | locf | 48.06478 | 48.04620 | 48.08847 | 48.06434 | 48.06634 | 48.06595 | 0.3544226 | 0.3138821 | 0.2942260 | 0.3445946 | 0.3267813 |
10% | ma | 48.06389 | 48.03796 | 48.09338 | 48.06803 | 48.06646 | 48.06581 | 0.0626536 | 0.0540541 | 0.0540541 | 0.0552826 | 0.0565111 |
1% | kalman | 48.06484 | 48.03953 | 48.09285 | 48.06702 | 48.06646 | 48.06606 | 0.0030713 | 0.0024570 | 0.0030713 | 0.0024570 | 0.0027641 |
1% | interpolation | 48.06478 | 48.03953 | 48.09285 | 48.06702 | 48.06647 | 48.06605 | 0.0024570 | 0.0012285 | 0.0030713 | 0.0018428 | 0.0021499 |
1% | ma | 48.06507 | 48.03947 | 48.09286 | 48.06706 | 48.06647 | 48.06612 | 0.0055283 | 0.0055283 | 0.0042998 | 0.0055283 | 0.0052211 |
10% | kalman | 48.06387 | 48.03828 | 48.09319 | 48.06821 | 48.06656 | 48.06589 | 0.0528256 | 0.0491400 | 0.0448403 | 0.0448403 | 0.0479115 |
1% | mean | 48.06836 | 48.03966 | 48.09137 | 48.06870 | 48.06658 | 48.06702 | 0.0165848 | 0.0184275 | 0.0128993 | 0.0159705 | 0.0159705 |
10% | interpolation | 48.06382 | 48.03829 | 48.09319 | 48.06827 | 48.06658 | 48.06589 | 0.0546683 | 0.0485258 | 0.0454545 | 0.0423833 | 0.0477580 |
20% | ma | 48.06454 | 48.03813 | 48.09355 | 48.06860 | 48.06676 | 48.06620 | 0.1087224 | 0.1068796 | 0.0939803 | 0.1081081 | 0.1044226 |
20% | locf | 48.06368 | 48.03972 | 48.09354 | 48.06721 | 48.06683 | 48.06604 | 0.1068796 | 0.0976658 | 0.0970516 | 0.0952088 | 0.0992015 |
30% | interpolation | 48.06474 | 48.03922 | 48.09377 | 48.06792 | 48.06697 | 48.06641 | 0.1461916 | 0.1351351 | 0.1332924 | 0.1461916 | 0.1402027 |
30% | kalman | 48.06447 | 48.03942 | 48.09377 | 48.06791 | 48.06703 | 48.06639 | 0.1529484 | 0.1418919 | 0.1363636 | 0.1541769 | 0.1463452 |
20% | kalman | 48.06438 | 48.03886 | 48.09359 | 48.06910 | 48.06718 | 48.06648 | 0.0927518 | 0.0945946 | 0.0816953 | 0.0933661 | 0.0906020 |
1% | random | 48.06803 | 48.03820 | 48.10323 | 48.06020 | 48.06721 | 48.06742 | 0.0221130 | 0.0178133 | 0.0233415 | 0.0196560 | 0.0207310 |
10% | locf | 48.06388 | 48.04010 | 48.09277 | 48.06879 | 48.06722 | 48.06639 | 0.0589681 | 0.0485258 | 0.0558968 | 0.0472973 | 0.0526720 |
20% | interpolation | 48.06431 | 48.03889 | 48.09359 | 48.06921 | 48.06723 | 48.06650 | 0.0872236 | 0.0896806 | 0.0755528 | 0.0853808 | 0.0844595 |
30% | ma | 48.06320 | 48.04018 | 48.09408 | 48.06792 | 48.06739 | 48.06635 | 0.1566339 | 0.1345209 | 0.1547912 | 0.1689189 | 0.1537162 |
20% | mean | 48.07973 | 48.02832 | 48.08681 | 48.08731 | 48.06748 | 48.07054 | 0.2782555 | 0.2721130 | 0.2395577 | 0.2800983 | 0.2675061 |
65% | ma | 48.07108 | 48.04394 | 48.09041 | 48.07027 | 48.06821 | 48.06892 | 0.3353808 | 0.2764128 | 0.2647420 | 0.3101966 | 0.2966830 |
65% | kalman | 48.07038 | 48.04468 | 48.08991 | 48.07061 | 48.06840 | 48.06890 | 0.3114251 | 0.2684275 | 0.2450860 | 0.2911548 | 0.2790233 |
30% | locf | 48.06791 | 48.04236 | 48.09580 | 48.06738 | 48.06851 | 48.06836 | 0.1621622 | 0.1461916 | 0.1541769 | 0.1658477 | 0.1570946 |
65% | interpolation | 48.07036 | 48.04468 | 48.08991 | 48.07116 | 48.06858 | 48.06903 | 0.3095823 | 0.2659705 | 0.2457002 | 0.2948403 | 0.2790233 |
10% | mean | 48.07145 | 48.03286 | 48.10848 | 48.06614 | 48.06916 | 48.06973 | 0.1726044 | 0.1603194 | 0.1572482 | 0.1664619 | 0.1641585 |
80% | ma | 48.06795 | 48.04561 | 48.09361 | 48.06843 | 48.06922 | 48.06890 | 0.4398034 | 0.3808354 | 0.3863636 | 0.4391892 | 0.4115479 |
80% | interpolation | 48.06756 | 48.04682 | 48.09353 | 48.06823 | 48.06952 | 48.06903 | 0.4250614 | 0.3525799 | 0.3789926 | 0.4244472 | 0.3952703 |
80% | kalman | 48.06761 | 48.04682 | 48.09357 | 48.06821 | 48.06953 | 48.06905 | 0.4256757 | 0.3519656 | 0.3820639 | 0.4256757 | 0.3963452 |
50% | mean | 48.10011 | 48.05789 | 48.10226 | 48.06800 | 48.07605 | 48.08206 | 0.4484029 | 0.3900491 | 0.3832924 | 0.4318182 | 0.4133907 |
30% | mean | 48.07771 | 48.04385 | 48.10906 | 48.09428 | 48.08240 | 48.08122 | 0.3642506 | 0.3507371 | 0.3224816 | 0.3869779 | 0.3561118 |
65% | mean | 48.06675 | 48.02981 | 48.12859 | 48.11165 | 48.09002 | 48.08420 | 0.4367322 | 0.3814496 | 0.3783784 | 0.4176904 | 0.4035627 |
85% | mean | 48.11939 | 48.09357 | 48.10504 | 48.10364 | 48.10075 | 48.10541 | 0.2260442 | 0.1984029 | 0.2125307 | 0.2278870 | 0.2162162 |
90% | mean | 48.03712 | 48.06868 | 48.06397 | 48.17040 | 48.10102 | 48.08504 | 0.1572482 | 0.1547912 | 0.1480344 | 0.1658477 | 0.1564803 |
70% | random | 47.68556 | 47.12402 | 47.95719 | 49.26794 | 48.11638 | 48.00868 | 0.9324324 | 0.9262899 | 0.9146192 | 0.9275184 | 0.9252150 |
80% | mean | 48.05587 | 48.05459 | 48.17520 | 48.12306 | 48.11762 | 48.10218 | 0.8802211 | 0.2708845 | 0.8157248 | 0.2954545 | 0.5655713 |
10% | random | 47.91482 | 48.20938 | 48.19080 | 48.04397 | 48.14805 | 48.08974 | 0.1848894 | 0.1025799 | 0.2291155 | 0.1633907 | 0.1699939 |
85% | random | 49.15168 | 47.72769 | 49.75232 | 47.47577 | 48.31860 | 48.52687 | 0.9895577 | 0.9889435 | 0.9631450 | 0.9649877 | 0.9766585 |
65% | random | 47.50568 | 48.49940 | 49.01626 | 47.85631 | 48.45733 | 48.21942 | 0.8832924 | 0.6977887 | 0.9195332 | 0.8298526 | 0.8326167 |
80% | random | 49.44640 | 48.28223 | 49.41269 | 47.95915 | 48.55136 | 48.77512 | 0.9686732 | 0.9705160 | 0.9017199 | 0.9133907 | 0.9385749 |
dfm1_amelia <- list(`1%` = data_m1_1_amelia, `10%` = data_m1_10_amelia,
`20%` = data_m1_20_amelia, `30%` = data_m1_30_amelia,
`50%` = data_m1_50_amelia, `65%` = data_m1_65_amelia,
`70%` = data_m1_70_amelia, `80%` = data_m1_80_amelia,
`85%` = data_m1_85_amelia, `90%` = data_m1_90_amelia)
dfm2_amelia <- list(`1%` = data_tm1_1_amelia, `10%` = data_tm1_10_amelia,
`20%` = data_tm1_20_amelia, `30%` = data_tm1_30_amelia,
`50%` = data_tm1_50_amelia, `65%` = data_tm1_65_amelia,
`70%` = data_tm1_70_amelia, `80%` = data_tm1_80_amelia,
`85%` = data_tm1_85_amelia, `90%` = data_tm1_90_amelia)
## Summarise
dfm1_amelia %<>% ldply(function(x) x %>% dplyr::rename(Model = .id)) %>%
tbl_df %>%
mutate(bias = (bias.open + bias.high + bias.low + bias.close)/4) %>%
arrange(Mean.HLC, Mean.OHLC, bias)
dfm2_amelia %<>% ldply(function(x) x %>% dplyr::rename(Model = .id)) %>%
tbl_df %>%
mutate(bias = (bias.open + bias.high + bias.low + bias.close)/4) %>%
arrange(Mean.HLC, Mean.OHLC, bias)
dfm1_amelia %>%
kable(caption = 'MSE 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%', height = '400px')
.id | Model | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close | bias |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1% | imp3 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000002 | 0.0000001 | 0.0000001 | 0.0036369 | 0.0033571 | 0.0030774 | 0.0032172 | 0.0033221 |
1% | imp5 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000002 | 0.0000001 | 0.0000001 | 0.0043363 | 0.0043363 | 0.0034970 | 0.0040565 | 0.0040565 |
1% | imp2 | 0.0000000 | 0.0000001 | 0.0000000 | 0.0000002 | 0.0000001 | 0.0000001 | 0.0032172 | 0.0034970 | 0.0029375 | 0.0036369 | 0.0033221 |
1% | imp4 | 0.0000000 | 0.0000001 | 0.0000000 | 0.0000002 | 0.0000001 | 0.0000001 | 0.0037768 | 0.0036369 | 0.0029375 | 0.0032172 | 0.0033921 |
1% | imp1 | 0.0000000 | 0.0000001 | 0.0000001 | 0.0000003 | 0.0000001 | 0.0000001 | 0.0039166 | 0.0034970 | 0.0030774 | 0.0029375 | 0.0033571 |
10% | imp2 | 0.0000011 | 0.0000009 | 0.0000011 | 0.0000009 | 0.0000010 | 0.0000010 | 0.0432228 | 0.0395860 | 0.0372080 | 0.0365086 | 0.0391313 |
10% | imp5 | 0.0000011 | 0.0000011 | 0.0000011 | 0.0000009 | 0.0000010 | 0.0000011 | 0.0416842 | 0.0401455 | 0.0348300 | 0.0359491 | 0.0381522 |
10% | imp4 | 0.0000012 | 0.0000011 | 0.0000011 | 0.0000010 | 0.0000011 | 0.0000011 | 0.0402854 | 0.0415443 | 0.0345503 | 0.0379074 | 0.0385718 |
10% | imp3 | 0.0000009 | 0.0000012 | 0.0000011 | 0.0000010 | 0.0000011 | 0.0000011 | 0.0440621 | 0.0432228 | 0.0370681 | 0.0381872 | 0.0406351 |
10% | imp1 | 0.0000010 | 0.0000013 | 0.0000011 | 0.0000011 | 0.0000012 | 0.0000011 | 0.0430829 | 0.0442020 | 0.0348300 | 0.0386068 | 0.0401804 |
20% | imp5 | 0.0000042 | 0.0000038 | 0.0000037 | 0.0000034 | 0.0000036 | 0.0000038 | 0.0959575 | 0.0844873 | 0.0770737 | 0.0706393 | 0.0820394 |
20% | imp2 | 0.0000050 | 0.0000039 | 0.0000036 | 0.0000037 | 0.0000037 | 0.0000040 | 0.0952581 | 0.0830885 | 0.0769338 | 0.0706393 | 0.0814799 |
20% | imp4 | 0.0000044 | 0.0000039 | 0.0000040 | 0.0000035 | 0.0000038 | 0.0000039 | 0.0952581 | 0.0853266 | 0.0769338 | 0.0730172 | 0.0826339 |
20% | imp3 | 0.0000046 | 0.0000035 | 0.0000045 | 0.0000038 | 0.0000039 | 0.0000041 | 0.0951182 | 0.0811302 | 0.0819695 | 0.0742761 | 0.0831235 |
20% | imp1 | 0.0000051 | 0.0000039 | 0.0000044 | 0.0000039 | 0.0000041 | 0.0000043 | 0.0969366 | 0.0854665 | 0.0762344 | 0.0698000 | 0.0821094 |
30% | imp2 | 0.0000106 | 0.0000077 | 0.0000092 | 0.0000080 | 0.0000083 | 0.0000089 | 0.1419779 | 0.1198769 | 0.1221150 | 0.1113442 | 0.1238285 |
30% | imp3 | 0.0000105 | 0.0000079 | 0.0000094 | 0.0000080 | 0.0000084 | 0.0000090 | 0.1437963 | 0.1226745 | 0.1253322 | 0.1134424 | 0.1263114 |
30% | imp5 | 0.0000117 | 0.0000083 | 0.0000096 | 0.0000082 | 0.0000087 | 0.0000094 | 0.1407190 | 0.1202965 | 0.1254721 | 0.1179186 | 0.1261016 |
30% | imp4 | 0.0000112 | 0.0000089 | 0.0000092 | 0.0000081 | 0.0000088 | 0.0000094 | 0.1421178 | 0.1278500 | 0.1281298 | 0.1249126 | 0.1307526 |
30% | imp1 | 0.0000112 | 0.0000086 | 0.0000097 | 0.0000082 | 0.0000088 | 0.0000094 | 0.1486921 | 0.1268709 | 0.1289691 | 0.1187579 | 0.1308225 |
50% | imp1 | 0.0002917 | 0.0002841 | 0.0002886 | 0.0002930 | 0.0002886 | 0.0002894 | 0.2471674 | 0.2261855 | 0.1993286 | 0.1994685 | 0.2180375 |
50% | imp4 | 0.0004111 | 0.0004062 | 0.0004048 | 0.0004095 | 0.0004068 | 0.0004079 | 0.2572388 | 0.2260456 | 0.2201707 | 0.2075815 | 0.2277591 |
50% | imp2 | 0.0004936 | 0.0004858 | 0.0004941 | 0.0004908 | 0.0004902 | 0.0004911 | 0.2330396 | 0.2022661 | 0.2007274 | 0.1881382 | 0.2060428 |
50% | imp5 | 0.0005000 | 0.0004896 | 0.0005036 | 0.0004981 | 0.0004971 | 0.0004978 | 0.2285634 | 0.2045041 | 0.2000280 | 0.1926144 | 0.2064275 |
50% | imp3 | 0.0005796 | 0.0005732 | 0.0005761 | 0.0005780 | 0.0005758 | 0.0005767 | 0.2238075 | 0.2071618 | 0.1945727 | 0.1948524 | 0.2050986 |
65% | imp1 | 0.0037038 | 0.0036950 | 0.0037028 | 0.0037144 | 0.0037040 | 0.0037040 | 0.3534760 | 0.3313750 | 0.3049378 | 0.3376696 | 0.3318646 |
65% | imp2 | 0.0037461 | 0.0037553 | 0.0037236 | 0.0037351 | 0.0037380 | 0.0037400 | 0.3432648 | 0.3056372 | 0.2940271 | 0.3113722 | 0.3135753 |
65% | imp4 | 0.0040114 | 0.0039923 | 0.0040178 | 0.0040065 | 0.0040055 | 0.0040070 | 0.3424255 | 0.3155686 | 0.2971045 | 0.3151490 | 0.3175619 |
65% | imp3 | 0.0040437 | 0.0040587 | 0.0040400 | 0.0040502 | 0.0040496 | 0.0040481 | 0.3513778 | 0.3303959 | 0.2906700 | 0.3392083 | 0.3279130 |
65% | imp5 | 0.0041922 | 0.0041956 | 0.0041963 | 0.0042075 | 0.0041998 | 0.0041979 | 0.3347321 | 0.3033991 | 0.2861939 | 0.3043782 | 0.3071758 |
70% | imp2 | 0.0058556 | 0.0058747 | 0.0058369 | 0.0058674 | 0.0058597 | 0.0058586 | 0.4127850 | 0.3397678 | 0.3446636 | 0.3655057 | 0.3656805 |
70% | imp4 | 0.0062205 | 0.0061725 | 0.0061818 | 0.0061766 | 0.0061770 | 0.0061879 | 0.4052315 | 0.3361309 | 0.3291369 | 0.3536159 | 0.3560288 |
70% | imp3 | 0.0063261 | 0.0062797 | 0.0063244 | 0.0063016 | 0.0063019 | 0.0063080 | 0.4242551 | 0.3555742 | 0.3613093 | 0.3979578 | 0.3847741 |
70% | imp5 | 0.0063617 | 0.0063084 | 0.0063286 | 0.0063186 | 0.0063185 | 0.0063293 | 0.4506924 | 0.3659253 | 0.3615890 | 0.3909638 | 0.3922926 |
70% | imp1 | 0.0065354 | 0.0065133 | 0.0065083 | 0.0064779 | 0.0064999 | 0.0065087 | 0.4273325 | 0.3474612 | 0.3517975 | 0.3743181 | 0.3752273 |
80% | imp5 | 0.0171595 | 0.0171508 | 0.0172147 | 0.0171742 | 0.0171799 | 0.0171748 | 0.4348860 | 0.3748776 | 0.3765562 | 0.4386628 | 0.4062456 |
80% | imp4 | 0.0178892 | 0.0178482 | 0.0178864 | 0.0178254 | 0.0178533 | 0.0178623 | 0.4648203 | 0.4056511 | 0.3899846 | 0.4616030 | 0.4305148 |
80% | imp1 | 0.0179774 | 0.0179669 | 0.0179299 | 0.0179521 | 0.0179496 | 0.0179566 | 0.4511120 | 0.3912435 | 0.3925024 | 0.4497132 | 0.4211428 |
80% | imp2 | 0.0181715 | 0.0182881 | 0.0182233 | 0.0182494 | 0.0182536 | 0.0182331 | 0.4567072 | 0.3929221 | 0.3660652 | 0.4357253 | 0.4128549 |
80% | imp3 | 0.0188257 | 0.0189006 | 0.0189500 | 0.0188341 | 0.0188949 | 0.0188776 | 0.4779689 | 0.4242551 | 0.4165618 | 0.5053854 | 0.4560428 |
85% | imp2 | 0.0285124 | 0.0284944 | 0.0284871 | 0.0284606 | 0.0284807 | 0.0284886 | 0.5101413 | 0.4222968 | 0.4008952 | 0.4583858 | 0.4479298 |
85% | imp4 | 0.0295915 | 0.0292050 | 0.0294749 | 0.0293755 | 0.0293518 | 0.0294117 | 0.5662330 | 0.4571269 | 0.4434187 | 0.5016086 | 0.4920968 |
85% | imp1 | 0.0296854 | 0.0294646 | 0.0295183 | 0.0295090 | 0.0294973 | 0.0295443 | 0.5305637 | 0.4369842 | 0.4357253 | 0.5021681 | 0.4763603 |
85% | imp5 | 0.0296229 | 0.0295257 | 0.0296607 | 0.0295922 | 0.0295929 | 0.0296004 | 0.5403553 | 0.4505525 | 0.4560078 | 0.5295846 | 0.4941251 |
85% | imp3 | 0.0309386 | 0.0309092 | 0.0309865 | 0.0308879 | 0.0309279 | 0.0309305 | 0.5620366 | 0.4683172 | 0.4411806 | 0.5178347 | 0.4973423 |
90% | imp5 | 0.0440889 | 0.0441617 | 0.0444101 | 0.0440200 | 0.0441973 | 0.0441702 | 0.5168555 | 0.4466359 | 0.4127850 | 0.5109806 | 0.4718142 |
90% | imp2 | 0.0444710 | 0.0444534 | 0.0444560 | 0.0445331 | 0.0444808 | 0.0444784 | 0.5337809 | 0.4397818 | 0.4294307 | 0.4981116 | 0.4752763 |
90% | imp1 | 0.0461595 | 0.0461769 | 0.0463285 | 0.0462737 | 0.0462597 | 0.0462346 | 0.5740663 | 0.4841237 | 0.4734928 | 0.5550427 | 0.5216814 |
90% | imp3 | 0.0458608 | 0.0460663 | 0.0465595 | 0.0462600 | 0.0462953 | 0.0461867 | 0.5509862 | 0.4834243 | 0.4192195 | 0.5245489 | 0.4945447 |
90% | imp4 | 0.0462646 | 0.0464083 | 0.0464868 | 0.0463752 | 0.0464234 | 0.0463837 | 0.5572807 | 0.4809064 | 0.4618828 | 0.5704294 | 0.5176248 |
dfm2_amelia %>%
kable(caption = 'MSE 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%', height = '400px')
.id | Model | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close | bias |
---|---|---|---|---|---|---|---|---|---|---|---|---|
90% | imp5 | 48.06921 | 48.04484 | 48.06735 | 48.03865 | 48.05028 | 48.05501 | 0.4299754 | 0.2929975 | 0.3108108 | 0.3882064 | 0.3554975 |
90% | imp3 | 48.07160 | 48.04393 | 48.06767 | 48.04916 | 48.05359 | 48.05809 | 0.3452088 | 0.3132678 | 0.2463145 | 0.3003686 | 0.3012899 |
90% | imp1 | 48.08072 | 48.03486 | 48.07313 | 48.05499 | 48.05433 | 48.06093 | 0.4189189 | 0.3409091 | 0.3114251 | 0.3544226 | 0.3564189 |
70% | imp3 | 48.05517 | 48.02155 | 48.08947 | 48.06446 | 48.05849 | 48.05766 | 0.3187961 | 0.3175676 | 0.2911548 | 0.3402948 | 0.3169533 |
65% | imp3 | 48.06148 | 48.03451 | 48.08888 | 48.06000 | 48.06113 | 48.06122 | 0.3132678 | 0.2727273 | 0.2880835 | 0.2788698 | 0.2882371 |
80% | imp3 | 48.06543 | 48.03745 | 48.09131 | 48.05879 | 48.06252 | 48.06325 | 0.4778870 | 0.3384521 | 0.3673219 | 0.2868550 | 0.3676290 |
70% | imp4 | 48.05450 | 48.03275 | 48.09509 | 48.06249 | 48.06344 | 48.06121 | 0.3243243 | 0.2628993 | 0.2807125 | 0.2585995 | 0.2816339 |
65% | imp5 | 48.06606 | 48.03924 | 48.08798 | 48.06334 | 48.06352 | 48.06416 | 0.3335381 | 0.2899263 | 0.2506143 | 0.2555283 | 0.2824017 |
70% | imp1 | 48.06299 | 48.03345 | 48.09202 | 48.06911 | 48.06486 | 48.06439 | 0.3378378 | 0.3015971 | 0.2702703 | 0.2850123 | 0.2986794 |
80% | imp2 | 48.09635 | 48.03502 | 48.09513 | 48.06611 | 48.06542 | 48.07315 | 0.4809582 | 0.4379607 | 0.3550369 | 0.3574939 | 0.4078624 |
90% | imp4 | 48.10228 | 48.04703 | 48.07421 | 48.07541 | 48.06555 | 48.07473 | 0.4576167 | 0.3851351 | 0.2585995 | 0.3538084 | 0.3637899 |
30% | imp2 | 48.06313 | 48.03819 | 48.09304 | 48.06734 | 48.06619 | 48.06543 | 0.1412776 | 0.1332924 | 0.1025799 | 0.1068796 | 0.1210074 |
10% | imp4 | 48.06482 | 48.03913 | 48.09300 | 48.06658 | 48.06624 | 48.06588 | 0.0466830 | 0.0386978 | 0.0417690 | 0.0350123 | 0.0405405 |
1% | imp2 | 48.06482 | 48.03929 | 48.09266 | 48.06683 | 48.06626 | 48.06590 | 0.0055283 | 0.0042998 | 0.0055283 | 0.0049140 | 0.0050676 |
10% | imp2 | 48.06486 | 48.03910 | 48.09271 | 48.06702 | 48.06627 | 48.06592 | 0.0423833 | 0.0393120 | 0.0337838 | 0.0337838 | 0.0373157 |
10% | imp5 | 48.06506 | 48.03883 | 48.09322 | 48.06684 | 48.06630 | 48.06599 | 0.0565111 | 0.0466830 | 0.0448403 | 0.0399263 | 0.0469902 |
1% | imp5 | 48.06493 | 48.03940 | 48.09263 | 48.06691 | 48.06631 | 48.06597 | 0.0049140 | 0.0042998 | 0.0049140 | 0.0042998 | 0.0046069 |
1% | imp3 | 48.06477 | 48.03933 | 48.09253 | 48.06716 | 48.06634 | 48.06595 | 0.0036855 | 0.0036855 | 0.0024570 | 0.0024570 | 0.0030713 |
1% | imp1 | 48.06482 | 48.03931 | 48.09267 | 48.06713 | 48.06637 | 48.06598 | 0.0055283 | 0.0055283 | 0.0030713 | 0.0030713 | 0.0042998 |
1% | imp4 | 48.06483 | 48.03946 | 48.09270 | 48.06697 | 48.06638 | 48.06599 | 0.0036855 | 0.0030713 | 0.0042998 | 0.0036855 | 0.0036855 |
10% | imp1 | 48.06460 | 48.03961 | 48.09248 | 48.06715 | 48.06642 | 48.06596 | 0.0479115 | 0.0337838 | 0.0411548 | 0.0307125 | 0.0383907 |
10% | imp3 | 48.06492 | 48.03909 | 48.09336 | 48.06684 | 48.06643 | 48.06605 | 0.0472973 | 0.0411548 | 0.0509828 | 0.0485258 | 0.0469902 |
30% | imp3 | 48.06348 | 48.03940 | 48.09352 | 48.06649 | 48.06647 | 48.06572 | 0.1308354 | 0.1210074 | 0.1081081 | 0.1093366 | 0.1173219 |
20% | imp4 | 48.06378 | 48.03896 | 48.09348 | 48.06752 | 48.06665 | 48.06593 | 0.0921376 | 0.0804668 | 0.0878378 | 0.0853808 | 0.0864558 |
65% | imp2 | 48.06781 | 48.03969 | 48.09423 | 48.06629 | 48.06673 | 48.06700 | 0.3347666 | 0.2819410 | 0.2813268 | 0.2647420 | 0.2906941 |
30% | imp5 | 48.06414 | 48.03950 | 48.09303 | 48.06767 | 48.06673 | 48.06608 | 0.1400491 | 0.1265356 | 0.1056511 | 0.1068796 | 0.1197789 |
20% | imp2 | 48.06493 | 48.03925 | 48.09323 | 48.06796 | 48.06682 | 48.06635 | 0.0921376 | 0.0884521 | 0.0823096 | 0.0902948 | 0.0882985 |
20% | imp3 | 48.06402 | 48.04021 | 48.09313 | 48.06711 | 48.06682 | 48.06612 | 0.0902948 | 0.0743243 | 0.0939803 | 0.0859951 | 0.0861486 |
30% | imp1 | 48.06165 | 48.03872 | 48.09386 | 48.06788 | 48.06682 | 48.06553 | 0.1425061 | 0.1314496 | 0.1173219 | 0.1216216 | 0.1282248 |
20% | imp5 | 48.06468 | 48.03944 | 48.09354 | 48.06753 | 48.06684 | 48.06630 | 0.1007371 | 0.0786241 | 0.0853808 | 0.0687961 | 0.0833845 |
20% | imp1 | 48.06460 | 48.03932 | 48.09376 | 48.06750 | 48.06686 | 48.06629 | 0.1056511 | 0.0970516 | 0.0970516 | 0.0982801 | 0.0995086 |
30% | imp4 | 48.06310 | 48.04006 | 48.09321 | 48.06804 | 48.06711 | 48.06610 | 0.1351351 | 0.1283784 | 0.1062654 | 0.1142506 | 0.1210074 |
65% | imp1 | 48.06753 | 48.03945 | 48.09411 | 48.06812 | 48.06723 | 48.06730 | 0.3519656 | 0.3028256 | 0.2714988 | 0.2542998 | 0.2951474 |
65% | imp4 | 48.06864 | 48.03979 | 48.09638 | 48.06667 | 48.06761 | 48.06787 | 0.3464373 | 0.2929975 | 0.3015971 | 0.2745700 | 0.3039005 |
50% | imp5 | 48.06570 | 48.04092 | 48.09609 | 48.06943 | 48.06881 | 48.06803 | 0.2506143 | 0.2346437 | 0.2223587 | 0.2297297 | 0.2343366 |
70% | imp2 | 48.06721 | 48.03401 | 48.10299 | 48.07360 | 48.07020 | 48.06945 | 0.3310811 | 0.3065111 | 0.2788698 | 0.3003686 | 0.3042076 |
50% | imp3 | 48.06598 | 48.04386 | 48.09902 | 48.06832 | 48.07040 | 48.06930 | 0.2156020 | 0.1971744 | 0.2082310 | 0.2100737 | 0.2077703 |
50% | imp2 | 48.06819 | 48.04509 | 48.09811 | 48.07183 | 48.07168 | 48.07080 | 0.2340295 | 0.2235872 | 0.2094595 | 0.2260442 | 0.2232801 |
50% | imp4 | 48.06758 | 48.04450 | 48.09766 | 48.07323 | 48.07180 | 48.07074 | 0.2143735 | 0.2039312 | 0.1947174 | 0.2063882 | 0.2048526 |
90% | imp2 | 48.10489 | 48.05921 | 48.08469 | 48.07178 | 48.07189 | 48.08014 | 0.4600737 | 0.3789926 | 0.2911548 | 0.3568796 | 0.3717752 |
80% | imp4 | 48.07939 | 48.04066 | 48.10873 | 48.06645 | 48.07195 | 48.07381 | 0.4662162 | 0.3765356 | 0.4041769 | 0.4011057 | 0.4120086 |
50% | imp1 | 48.06908 | 48.04628 | 48.10127 | 48.07136 | 48.07297 | 48.07200 | 0.2192875 | 0.1934889 | 0.2039312 | 0.2027027 | 0.2048526 |
70% | imp5 | 48.06417 | 48.04374 | 48.09782 | 48.08140 | 48.07432 | 48.07178 | 0.2997543 | 0.2972973 | 0.2899263 | 0.3230958 | 0.3025184 |
80% | imp5 | 48.08769 | 48.05208 | 48.11440 | 48.07758 | 48.08135 | 48.08294 | 0.3980344 | 0.3470516 | 0.3200246 | 0.3304668 | 0.3488943 |
85% | imp5 | 48.05452 | 48.04422 | 48.10806 | 48.09895 | 48.08375 | 48.07644 | 0.4299754 | 0.4103194 | 0.3869779 | 0.4434889 | 0.4176904 |
85% | imp3 | 48.06908 | 48.06095 | 48.10012 | 48.10192 | 48.08767 | 48.08302 | 0.3568796 | 0.3396806 | 0.3218673 | 0.3488943 | 0.3418305 |
85% | imp4 | 48.07092 | 48.06230 | 48.11810 | 48.08630 | 48.08890 | 48.08441 | 0.4459459 | 0.3200246 | 0.3495086 | 0.2696560 | 0.3462838 |
80% | imp1 | 48.09547 | 48.06079 | 48.12047 | 48.08773 | 48.08966 | 48.09112 | 0.3599509 | 0.3396806 | 0.3065111 | 0.3476658 | 0.3384521 |
85% | imp2 | 48.07333 | 48.06250 | 48.11384 | 48.10463 | 48.09366 | 48.08858 | 0.3562654 | 0.3427518 | 0.3200246 | 0.3839066 | 0.3507371 |
85% | imp1 | 48.06422 | 48.06042 | 48.11247 | 48.11366 | 48.09552 | 48.08769 | 0.3765356 | 0.3568796 | 0.3427518 | 0.3820639 | 0.3645577 |
tidyr::fill
弥补数据偏差比较dfm1_tidyr <- list(`1%` = data_m1_1_tidyr, `10%` = data_m1_10_tidyr,
`20%` = data_m1_20_tidyr, `30%` = data_m1_30_tidyr,
`50%` = data_m1_50_tidyr, `65%` = data_m1_65_tidyr,
`70%` = data_m1_70_tidyr, `80%` = data_m1_80_tidyr,
`85%` = data_m1_85_tidyr, `90%` = data_m1_90_tidyr)
dfm2_tidyr <- list(`1%` = data_tm1_1_tidyr, `10%` = data_tm1_10_tidyr,
`20%` = data_tm1_20_tidyr, `30%` = data_tm1_30_tidyr,
`50%` = data_tm1_50_tidyr, `65%` = data_tm1_65_tidyr,
`70%` = data_tm1_70_tidyr, `80%` = data_tm1_80_tidyr,
`85%` = data_tm1_85_tidyr, `90%` = data_tm1_90_tidyr)
## Summarise
dfm1_tidyr %<>% ldply(function(x) x %>% mutate(Model = factor('tidyr_fill'))) %>%
tbl_df %>%
mutate(bias = (bias.open + bias.high + bias.low + bias.close)/4) %>%
dplyr::select(.id, Model, AskOpen, AskHigh, AskLow, AskClose,
Mean.HLC, Mean.OHLC, bias.open, bias.high, bias.low, bias.close, bias) %>%
arrange(Mean.HLC, Mean.OHLC, bias)
dfm2_tidyr %<>% ldply(function(x) x %>% mutate(Model = factor('tidyr_fill'))) %>%
tbl_df %>%
mutate(bias = (bias.open + bias.high + bias.low + bias.close)/4) %>%
dplyr::select(.id, Model, AskOpen, AskHigh, AskLow, AskClose,
Mean.HLC, Mean.OHLC, bias.open, bias.high, bias.low, bias.close, bias) %>%
arrange(Mean.HLC, Mean.OHLC, bias)
dfm1_tidyr %>%
kable(caption = 'MSE 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%', height = '400px')
.id | Model | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close | bias |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1% | tidyr_fill | 0.0000014 | 0.0000031 | 0.0000018 | 0.0000036 | 0.0000029 | 0.0000025 | 0.0037768 | 0.0053154 | 0.0043363 | 0.0058749 | 0.0048258 |
10% | tidyr_fill | 0.0000190 | 0.0000160 | 0.0000167 | 0.0000208 | 0.0000178 | 0.0000181 | 0.0530144 | 0.0507763 | 0.0471395 | 0.0464401 | 0.0493426 |
20% | tidyr_fill | 0.0000526 | 0.0000327 | 0.0000319 | 0.0000438 | 0.0000362 | 0.0000403 | 0.1105050 | 0.0969366 | 0.0981956 | 0.0916212 | 0.0993146 |
30% | tidyr_fill | 0.0000830 | 0.0000646 | 0.0000672 | 0.0000785 | 0.0000701 | 0.0000733 | 0.1658973 | 0.1559659 | 0.1465939 | 0.1577843 | 0.1565604 |
50% | tidyr_fill | 0.0001652 | 0.0001420 | 0.0001480 | 0.0001916 | 0.0001606 | 0.0001617 | 0.2764023 | 0.2480067 | 0.2397538 | 0.2771017 | 0.2603161 |
65% | tidyr_fill | 0.0002955 | 0.0003304 | 0.0002383 | 0.0002986 | 0.0002891 | 0.0002907 | 0.3817317 | 0.3404672 | 0.3154287 | 0.3776752 | 0.3538257 |
70% | tidyr_fill | 0.0003904 | 0.0003374 | 0.0003273 | 0.0004098 | 0.0003581 | 0.0003662 | 0.4253742 | 0.3855085 | 0.3597706 | 0.4227165 | 0.3983424 |
80% | tidyr_fill | 0.0007388 | 0.0005612 | 0.0005608 | 0.0006582 | 0.0005934 | 0.0006297 | 0.5302840 | 0.4497132 | 0.4574066 | 0.5218912 | 0.4898238 |
85% | tidyr_fill | 0.0009119 | 0.0007365 | 0.0009503 | 0.0008236 | 0.0008368 | 0.0008556 | 0.5592391 | 0.4825850 | 0.4667786 | 0.5409148 | 0.5123794 |
90% | tidyr_fill | 0.0015472 | 0.0012908 | 0.0012546 | 0.0015022 | 0.0013492 | 0.0013987 | 0.6398098 | 0.5505665 | 0.5153168 | 0.6048398 | 0.5776332 |
dfm2_tidyr %>%
kable(caption = 'MSE 缺失值') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%', height = '400px')
.id | Model | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close | bias |
---|---|---|---|---|---|---|---|---|---|---|---|---|
90% | tidyr_fill | 48.09342 | 48.02838 | 48.08001 | 48.02318 | 48.04386 | 48.05625 | 0.5368550 | 0.5018428 | 0.5122850 | 0.6375921 | 0.5471437 |
85% | tidyr_fill | 48.06139 | 48.04274 | 48.09568 | 48.04604 | 48.06149 | 48.06146 | 0.5479115 | 0.4570025 | 0.5012285 | 0.5491400 | 0.5138206 |
80% | tidyr_fill | 48.07033 | 48.03224 | 48.09007 | 48.06271 | 48.06167 | 48.06384 | 0.4932432 | 0.4404177 | 0.3912776 | 0.4551597 | 0.4450246 |
70% | tidyr_fill | 48.05579 | 48.03106 | 48.07434 | 48.08234 | 48.06258 | 48.06088 | 0.4029484 | 0.4004914 | 0.3003686 | 0.4054054 | 0.3773034 |
50% | tidyr_fill | 48.06433 | 48.03798 | 48.09349 | 48.06476 | 48.06541 | 48.06514 | 0.2807125 | 0.2444717 | 0.2242015 | 0.2585995 | 0.2519963 |
1% | tidyr_fill | 48.06471 | 48.03943 | 48.09250 | 48.06706 | 48.06633 | 48.06592 | 0.0079853 | 0.0067568 | 0.0030713 | 0.0024570 | 0.0050676 |
65% | tidyr_fill | 48.06478 | 48.04620 | 48.08847 | 48.06434 | 48.06634 | 48.06595 | 0.3544226 | 0.3138821 | 0.2942260 | 0.3445946 | 0.3267813 |
20% | tidyr_fill | 48.06368 | 48.03972 | 48.09354 | 48.06721 | 48.06683 | 48.06604 | 0.1068796 | 0.0976658 | 0.0970516 | 0.0952088 | 0.0992015 |
10% | tidyr_fill | 48.06388 | 48.04010 | 48.09277 | 48.06879 | 48.06722 | 48.06639 | 0.0589681 | 0.0485258 | 0.0558968 | 0.0472973 | 0.0526720 |
30% | tidyr_fill | 48.06791 | 48.04236 | 48.09580 | 48.06738 | 48.06851 | 48.06836 | 0.1621622 | 0.1461916 | 0.1541769 | 0.1658477 | 0.1570946 |
## 1-min data.
dfm1 <- list(dfm1_impTS, dfm1_amelia, dfm1_tidyr) %>% bind_rows
dfm1 %>% ddply(.(.id), arrange, Mean.HLC, Mean.OHLC, bias) %>% tbl_df %>%
kable(caption = 'Bias Comparison') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
group_rows('1%', 1, 12, label_row_css = 'background-color: #e68a00; color: #fff;') %>%
group_rows('10%', 13, 24, label_row_css = 'background-color: #ff0000; color: #fff;') %>%
group_rows('20%', 25, 36, label_row_css = 'background-color: #bf80ff; color: #fff;') %>%
group_rows('30%', 37, 48, label_row_css = 'background-color: #66ff33; color: #fff;') %>%
group_rows('50%', 49, 60, label_row_css = 'background-color: #6666ff; color: #fff;') %>%
group_rows('65%', 61, 72, label_row_css = 'background-color: #66e0ff; color: #fff;') %>%
group_rows('70%', 73, 84, label_row_css = 'background-color:#0066ff; color: #fff;') %>%
group_rows('80%', 85, 96, label_row_css = 'background-color: #ff9900; color: #fff;') %>%
group_rows('85%', 97, 108, label_row_css = 'background-color: #33ff33; color: #fff;') %>%
group_rows('90%', 109, 120, label_row_css = 'background-color: #339966; color: #fff;') %>%
scroll_box(width = '100%', height = '400px')
.id | Model | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close | bias |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1% | ||||||||||||
1% | imp3 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000002 | 0.0000001 | 0.0000001 | 0.0036369 | 0.0033571 | 0.0030774 | 0.0032172 | 0.0033221 |
1% | imp5 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000002 | 0.0000001 | 0.0000001 | 0.0043363 | 0.0043363 | 0.0034970 | 0.0040565 | 0.0040565 |
1% | imp2 | 0.0000000 | 0.0000001 | 0.0000000 | 0.0000002 | 0.0000001 | 0.0000001 | 0.0032172 | 0.0034970 | 0.0029375 | 0.0036369 | 0.0033221 |
1% | imp4 | 0.0000000 | 0.0000001 | 0.0000000 | 0.0000002 | 0.0000001 | 0.0000001 | 0.0037768 | 0.0036369 | 0.0029375 | 0.0032172 | 0.0033921 |
1% | imp1 | 0.0000000 | 0.0000001 | 0.0000001 | 0.0000003 | 0.0000001 | 0.0000001 | 0.0039166 | 0.0034970 | 0.0030774 | 0.0029375 | 0.0033571 |
1% | kalman | 0.0000006 | 0.0000008 | 0.0000009 | 0.0000013 | 0.0000010 | 0.0000009 | 0.0050357 | 0.0064345 | 0.0050357 | 0.0064345 | 0.0057351 |
1% | interpolation | 0.0000006 | 0.0000008 | 0.0000009 | 0.0000013 | 0.0000010 | 0.0000009 | 0.0048958 | 0.0062946 | 0.0048958 | 0.0062946 | 0.0055952 |
1% | ma | 0.0000007 | 0.0000014 | 0.0000012 | 0.0000013 | 0.0000013 | 0.0000012 | 0.0079731 | 0.0088124 | 0.0067142 | 0.0086725 | 0.0080431 |
1% | locf | 0.0000014 | 0.0000031 | 0.0000018 | 0.0000036 | 0.0000029 | 0.0000025 | 0.0037768 | 0.0053154 | 0.0043363 | 0.0058749 | 0.0048258 |
1% | tidyr_fill | 0.0000014 | 0.0000031 | 0.0000018 | 0.0000036 | 0.0000029 | 0.0000025 | 0.0037768 | 0.0053154 | 0.0043363 | 0.0058749 | 0.0048258 |
1% | mean | 0.0005507 | 0.0005905 | 0.0004618 | 0.0006969 | 0.0005831 | 0.0005750 | 0.0194433 | 0.0179046 | 0.0205623 | 0.0194433 | 0.0193384 |
1% | random | 0.0024076 | 0.0006038 | 0.0023222 | 0.0008296 | 0.0012519 | 0.0015408 | 0.0145475 | 0.0093719 | 0.0195832 | 0.0145475 | 0.0145125 |
10% | ||||||||||||
10% | imp2 | 0.0000011 | 0.0000009 | 0.0000011 | 0.0000009 | 0.0000010 | 0.0000010 | 0.0432228 | 0.0395860 | 0.0372080 | 0.0365086 | 0.0391313 |
10% | imp5 | 0.0000011 | 0.0000011 | 0.0000011 | 0.0000009 | 0.0000010 | 0.0000011 | 0.0416842 | 0.0401455 | 0.0348300 | 0.0359491 | 0.0381522 |
10% | imp4 | 0.0000012 | 0.0000011 | 0.0000011 | 0.0000010 | 0.0000011 | 0.0000011 | 0.0402854 | 0.0415443 | 0.0345503 | 0.0379074 | 0.0385718 |
10% | imp3 | 0.0000009 | 0.0000012 | 0.0000011 | 0.0000010 | 0.0000011 | 0.0000011 | 0.0440621 | 0.0432228 | 0.0370681 | 0.0381872 | 0.0406351 |
10% | imp1 | 0.0000010 | 0.0000013 | 0.0000011 | 0.0000011 | 0.0000012 | 0.0000011 | 0.0430829 | 0.0442020 | 0.0348300 | 0.0386068 | 0.0401804 |
10% | interpolation | 0.0000090 | 0.0000071 | 0.0000075 | 0.0000095 | 0.0000080 | 0.0000083 | 0.0495174 | 0.0479787 | 0.0492377 | 0.0496573 | 0.0490978 |
10% | kalman | 0.0000090 | 0.0000071 | 0.0000075 | 0.0000096 | 0.0000081 | 0.0000083 | 0.0513359 | 0.0500769 | 0.0509162 | 0.0517555 | 0.0510211 |
10% | ma | 0.0000108 | 0.0000100 | 0.0000107 | 0.0000129 | 0.0000112 | 0.0000111 | 0.0672821 | 0.0636453 | 0.0614072 | 0.0665827 | 0.0647293 |
10% | locf | 0.0000190 | 0.0000160 | 0.0000167 | 0.0000208 | 0.0000178 | 0.0000181 | 0.0530144 | 0.0507763 | 0.0471395 | 0.0464401 | 0.0493426 |
10% | tidyr_fill | 0.0000190 | 0.0000160 | 0.0000167 | 0.0000208 | 0.0000178 | 0.0000181 | 0.0530144 | 0.0507763 | 0.0471395 | 0.0464401 | 0.0493426 |
10% | mean | 0.0057660 | 0.0063513 | 0.0060217 | 0.0066473 | 0.0063401 | 0.0061966 | 0.1719122 | 0.1493915 | 0.1814240 | 0.1751294 | 0.1694643 |
10% | random | 0.0077868 | 0.0226350 | 0.0087245 | 0.0291799 | 0.0201798 | 0.0170816 | 0.1092460 | 0.0870052 | 0.1375017 | 0.1214156 | 0.1137921 |
20% | ||||||||||||
20% | imp5 | 0.0000042 | 0.0000038 | 0.0000037 | 0.0000034 | 0.0000036 | 0.0000038 | 0.0959575 | 0.0844873 | 0.0770737 | 0.0706393 | 0.0820394 |
20% | imp2 | 0.0000050 | 0.0000039 | 0.0000036 | 0.0000037 | 0.0000037 | 0.0000040 | 0.0952581 | 0.0830885 | 0.0769338 | 0.0706393 | 0.0814799 |
20% | imp4 | 0.0000044 | 0.0000039 | 0.0000040 | 0.0000035 | 0.0000038 | 0.0000039 | 0.0952581 | 0.0853266 | 0.0769338 | 0.0730172 | 0.0826339 |
20% | imp3 | 0.0000046 | 0.0000035 | 0.0000045 | 0.0000038 | 0.0000039 | 0.0000041 | 0.0951182 | 0.0811302 | 0.0819695 | 0.0742761 | 0.0831235 |
20% | imp1 | 0.0000051 | 0.0000039 | 0.0000044 | 0.0000039 | 0.0000041 | 0.0000043 | 0.0969366 | 0.0854665 | 0.0762344 | 0.0698000 | 0.0821094 |
20% | interpolation | 0.0000229 | 0.0000146 | 0.0000154 | 0.0000201 | 0.0000167 | 0.0000182 | 0.1054693 | 0.0988950 | 0.0949783 | 0.0948384 | 0.0985453 |
20% | kalman | 0.0000229 | 0.0000146 | 0.0000154 | 0.0000202 | 0.0000167 | 0.0000183 | 0.1088264 | 0.1022521 | 0.0976360 | 0.0986152 | 0.1018324 |
20% | ma | 0.0000275 | 0.0000192 | 0.0000198 | 0.0000245 | 0.0000212 | 0.0000228 | 0.1292488 | 0.1209959 | 0.1112044 | 0.1222549 | 0.1209260 |
20% | locf | 0.0000526 | 0.0000327 | 0.0000319 | 0.0000438 | 0.0000362 | 0.0000403 | 0.1105050 | 0.0969366 | 0.0981956 | 0.0916212 | 0.0993146 |
20% | tidyr_fill | 0.0000526 | 0.0000327 | 0.0000319 | 0.0000438 | 0.0000362 | 0.0000403 | 0.1105050 | 0.0969366 | 0.0981956 | 0.0916212 | 0.0993146 |
20% | mean | 0.0123887 | 0.0117783 | 0.0116303 | 0.0126437 | 0.0120174 | 0.0121103 | 0.3066163 | 0.2652119 | 0.2957057 | 0.3014408 | 0.2922437 |
20% | random | 0.0308892 | 0.0318163 | 0.0327874 | 0.0353737 | 0.0333258 | 0.0327167 | 0.3551546 | 0.3561337 | 0.3119317 | 0.3477409 | 0.3427402 |
30% | ||||||||||||
30% | imp2 | 0.0000106 | 0.0000077 | 0.0000092 | 0.0000080 | 0.0000083 | 0.0000089 | 0.1419779 | 0.1198769 | 0.1221150 | 0.1113442 | 0.1238285 |
30% | imp3 | 0.0000105 | 0.0000079 | 0.0000094 | 0.0000080 | 0.0000084 | 0.0000090 | 0.1437963 | 0.1226745 | 0.1253322 | 0.1134424 | 0.1263114 |
30% | imp5 | 0.0000117 | 0.0000083 | 0.0000096 | 0.0000082 | 0.0000087 | 0.0000094 | 0.1407190 | 0.1202965 | 0.1254721 | 0.1179186 | 0.1261016 |
30% | imp4 | 0.0000112 | 0.0000089 | 0.0000092 | 0.0000081 | 0.0000088 | 0.0000094 | 0.1421178 | 0.1278500 | 0.1281298 | 0.1249126 | 0.1307526 |
30% | imp1 | 0.0000112 | 0.0000086 | 0.0000097 | 0.0000082 | 0.0000088 | 0.0000094 | 0.1486921 | 0.1268709 | 0.1289691 | 0.1187579 | 0.1308225 |
30% | interpolation | 0.0000374 | 0.0000247 | 0.0000282 | 0.0000342 | 0.0000290 | 0.0000311 | 0.1573647 | 0.1435166 | 0.1454749 | 0.1502308 | 0.1491467 |
30% | kalman | 0.0000373 | 0.0000247 | 0.0000282 | 0.0000344 | 0.0000291 | 0.0000312 | 0.1603021 | 0.1458945 | 0.1481326 | 0.1537278 | 0.1520143 |
30% | ma | 0.0000425 | 0.0000321 | 0.0000351 | 0.0000407 | 0.0000359 | 0.0000376 | 0.1805847 | 0.1612813 | 0.1653378 | 0.1772276 | 0.1711078 |
30% | locf | 0.0000830 | 0.0000646 | 0.0000672 | 0.0000785 | 0.0000701 | 0.0000733 | 0.1658973 | 0.1559659 | 0.1465939 | 0.1577843 | 0.1565604 |
30% | tidyr_fill | 0.0000830 | 0.0000646 | 0.0000672 | 0.0000785 | 0.0000701 | 0.0000733 | 0.1658973 | 0.1559659 | 0.1465939 | 0.1577843 | 0.1565604 |
30% | mean | 0.0190881 | 0.0174648 | 0.0182534 | 0.0190274 | 0.0182485 | 0.0184584 | 0.4018744 | 0.3536159 | 0.3729193 | 0.4076095 | 0.3840048 |
30% | random | 0.0891399 | 0.0503076 | 0.0991078 | 0.0534894 | 0.0676349 | 0.0730112 | 0.4879004 | 0.5048258 | 0.3189257 | 0.3983774 | 0.4275073 |
50% | ||||||||||||
50% | interpolation | 0.0000704 | 0.0000582 | 0.0000780 | 0.0000750 | 0.0000704 | 0.0000704 | 0.2460484 | 0.2282837 | 0.2087005 | 0.2559799 | 0.2347531 |
50% | kalman | 0.0000704 | 0.0000582 | 0.0000783 | 0.0000750 | 0.0000705 | 0.0000705 | 0.2530424 | 0.2322003 | 0.2149951 | 0.2632536 | 0.2408728 |
50% | ma | 0.0000804 | 0.0000680 | 0.0001001 | 0.0000872 | 0.0000851 | 0.0000839 | 0.2808784 | 0.2552805 | 0.2345783 | 0.2899706 | 0.2651769 |
50% | locf | 0.0001652 | 0.0001420 | 0.0001480 | 0.0001916 | 0.0001606 | 0.0001617 | 0.2764023 | 0.2480067 | 0.2397538 | 0.2771017 | 0.2603161 |
50% | tidyr_fill | 0.0001652 | 0.0001420 | 0.0001480 | 0.0001916 | 0.0001606 | 0.0001617 | 0.2764023 | 0.2480067 | 0.2397538 | 0.2771017 | 0.2603161 |
50% | imp1 | 0.0002917 | 0.0002841 | 0.0002886 | 0.0002930 | 0.0002886 | 0.0002894 | 0.2471674 | 0.2261855 | 0.1993286 | 0.1994685 | 0.2180375 |
50% | imp4 | 0.0004111 | 0.0004062 | 0.0004048 | 0.0004095 | 0.0004068 | 0.0004079 | 0.2572388 | 0.2260456 | 0.2201707 | 0.2075815 | 0.2277591 |
50% | imp2 | 0.0004936 | 0.0004858 | 0.0004941 | 0.0004908 | 0.0004902 | 0.0004911 | 0.2330396 | 0.2022661 | 0.2007274 | 0.1881382 | 0.2060428 |
50% | imp5 | 0.0005000 | 0.0004896 | 0.0005036 | 0.0004981 | 0.0004971 | 0.0004978 | 0.2285634 | 0.2045041 | 0.2000280 | 0.1926144 | 0.2064275 |
50% | imp3 | 0.0005796 | 0.0005732 | 0.0005761 | 0.0005780 | 0.0005758 | 0.0005767 | 0.2238075 | 0.2071618 | 0.1945727 | 0.1948524 | 0.2050986 |
50% | mean | 0.0306066 | 0.0306853 | 0.0305917 | 0.0302918 | 0.0305230 | 0.0305439 | 0.4684571 | 0.4066303 | 0.4129249 | 0.4671982 | 0.4388026 |
50% | random | 0.0401562 | 0.0407120 | 0.0546502 | 0.0518731 | 0.0490784 | 0.0468479 | 0.8181564 | 0.7682193 | 0.7954959 | 0.7911596 | 0.7932578 |
65% | ||||||||||||
65% | kalman | 0.0001282 | 0.0001238 | 0.0001133 | 0.0001202 | 0.0001191 | 0.0001214 | 0.3674640 | 0.3241013 | 0.2845153 | 0.3415862 | 0.3294167 |
65% | interpolation | 0.0001279 | 0.0001238 | 0.0001136 | 0.0001206 | 0.0001193 | 0.0001215 | 0.3662051 | 0.3241013 | 0.2831165 | 0.3394880 | 0.3282277 |
65% | ma | 0.0001512 | 0.0001488 | 0.0001313 | 0.0001449 | 0.0001417 | 0.0001441 | 0.3813121 | 0.3329137 | 0.3022800 | 0.3621486 | 0.3446636 |
65% | locf | 0.0002955 | 0.0003304 | 0.0002383 | 0.0002986 | 0.0002891 | 0.0002907 | 0.3817317 | 0.3404672 | 0.3154287 | 0.3776752 | 0.3538257 |
65% | tidyr_fill | 0.0002955 | 0.0003304 | 0.0002383 | 0.0002986 | 0.0002891 | 0.0002907 | 0.3817317 | 0.3404672 | 0.3154287 | 0.3776752 | 0.3538257 |
65% | imp1 | 0.0037038 | 0.0036950 | 0.0037028 | 0.0037144 | 0.0037040 | 0.0037040 | 0.3534760 | 0.3313750 | 0.3049378 | 0.3376696 | 0.3318646 |
65% | imp2 | 0.0037461 | 0.0037553 | 0.0037236 | 0.0037351 | 0.0037380 | 0.0037400 | 0.3432648 | 0.3056372 | 0.2940271 | 0.3113722 | 0.3135753 |
65% | imp4 | 0.0040114 | 0.0039923 | 0.0040178 | 0.0040065 | 0.0040055 | 0.0040070 | 0.3424255 | 0.3155686 | 0.2971045 | 0.3151490 | 0.3175619 |
65% | imp3 | 0.0040437 | 0.0040587 | 0.0040400 | 0.0040502 | 0.0040496 | 0.0040481 | 0.3513778 | 0.3303959 | 0.2906700 | 0.3392083 | 0.3279130 |
65% | imp5 | 0.0041922 | 0.0041956 | 0.0041963 | 0.0042075 | 0.0041998 | 0.0041979 | 0.3347321 | 0.3033991 | 0.2861939 | 0.3043782 | 0.3071758 |
65% | mean | 0.0400602 | 0.0406286 | 0.0391629 | 0.0395314 | 0.0397743 | 0.0398458 | 0.4402014 | 0.3983774 | 0.3761365 | 0.4364247 | 0.4127850 |
65% | random | 0.0533171 | 0.0806651 | 0.1361035 | 0.1684688 | 0.1284125 | 0.1096386 | 0.2796195 | 0.2432508 | 0.6848510 | 0.6998182 | 0.4768849 |
70% | ||||||||||||
70% | kalman | 0.0001618 | 0.0001265 | 0.0001262 | 0.0001725 | 0.0001417 | 0.0001468 | 0.3959994 | 0.3457826 | 0.3231221 | 0.3883061 | 0.3633026 |
70% | interpolation | 0.0001618 | 0.0001265 | 0.0001262 | 0.0001725 | 0.0001417 | 0.0001468 | 0.3927822 | 0.3453630 | 0.3225626 | 0.3881662 | 0.3622185 |
70% | ma | 0.0001928 | 0.0001537 | 0.0001548 | 0.0001982 | 0.0001689 | 0.0001749 | 0.4151630 | 0.3650860 | 0.3400476 | 0.4130648 | 0.3833403 |
70% | locf | 0.0003904 | 0.0003374 | 0.0003273 | 0.0004098 | 0.0003581 | 0.0003662 | 0.4253742 | 0.3855085 | 0.3597706 | 0.4227165 | 0.3983424 |
70% | tidyr_fill | 0.0003904 | 0.0003374 | 0.0003273 | 0.0004098 | 0.0003581 | 0.0003662 | 0.4253742 | 0.3855085 | 0.3597706 | 0.4227165 | 0.3983424 |
70% | imp2 | 0.0058556 | 0.0058747 | 0.0058369 | 0.0058674 | 0.0058597 | 0.0058586 | 0.4127850 | 0.3397678 | 0.3446636 | 0.3655057 | 0.3656805 |
70% | imp4 | 0.0062205 | 0.0061725 | 0.0061818 | 0.0061766 | 0.0061770 | 0.0061879 | 0.4052315 | 0.3361309 | 0.3291369 | 0.3536159 | 0.3560288 |
70% | imp3 | 0.0063261 | 0.0062797 | 0.0063244 | 0.0063016 | 0.0063019 | 0.0063080 | 0.4242551 | 0.3555742 | 0.3613093 | 0.3979578 | 0.3847741 |
70% | imp5 | 0.0063617 | 0.0063084 | 0.0063286 | 0.0063186 | 0.0063185 | 0.0063293 | 0.4506924 | 0.3659253 | 0.3615890 | 0.3909638 | 0.3922926 |
70% | imp1 | 0.0065354 | 0.0065133 | 0.0065083 | 0.0064779 | 0.0064999 | 0.0065087 | 0.4273325 | 0.3474612 | 0.3517975 | 0.3743181 | 0.3752273 |
70% | mean | 0.0434017 | 0.0423406 | 0.0426192 | 0.0426670 | 0.0425423 | 0.0427571 | 0.4084487 | 0.3775353 | 0.3564135 | 0.4071898 | 0.3873968 |
70% | random | 0.1551202 | 0.0523911 | 0.1117988 | 0.2454679 | 0.1365526 | 0.1411945 | 0.8742481 | 0.8708910 | 0.7637432 | 0.8887956 | 0.8494195 |
80% | ||||||||||||
80% | interpolation | 0.0002653 | 0.0002080 | 0.0002403 | 0.0002507 | 0.0002330 | 0.0002411 | 0.4865016 | 0.4063505 | 0.4062107 | 0.4692964 | 0.4420898 |
80% | kalman | 0.0002647 | 0.0002080 | 0.0002403 | 0.0002511 | 0.0002331 | 0.0002410 | 0.4876206 | 0.4055113 | 0.4049517 | 0.4662191 | 0.4410757 |
80% | ma | 0.0003359 | 0.0002728 | 0.0002943 | 0.0003105 | 0.0002925 | 0.0003034 | 0.5045461 | 0.4288712 | 0.4227165 | 0.4869212 | 0.4607637 |
80% | locf | 0.0007388 | 0.0005612 | 0.0005608 | 0.0006582 | 0.0005934 | 0.0006297 | 0.5302840 | 0.4497132 | 0.4574066 | 0.5218912 | 0.4898238 |
80% | tidyr_fill | 0.0007388 | 0.0005612 | 0.0005608 | 0.0006582 | 0.0005934 | 0.0006297 | 0.5302840 | 0.4497132 | 0.4574066 | 0.5218912 | 0.4898238 |
80% | imp5 | 0.0171595 | 0.0171508 | 0.0172147 | 0.0171742 | 0.0171799 | 0.0171748 | 0.4348860 | 0.3748776 | 0.3765562 | 0.4386628 | 0.4062456 |
80% | imp4 | 0.0178892 | 0.0178482 | 0.0178864 | 0.0178254 | 0.0178533 | 0.0178623 | 0.4648203 | 0.4056511 | 0.3899846 | 0.4616030 | 0.4305148 |
80% | imp1 | 0.0179774 | 0.0179669 | 0.0179299 | 0.0179521 | 0.0179496 | 0.0179566 | 0.4511120 | 0.3912435 | 0.3925024 | 0.4497132 | 0.4211428 |
80% | imp2 | 0.0181715 | 0.0182881 | 0.0182233 | 0.0182494 | 0.0182536 | 0.0182331 | 0.4567072 | 0.3929221 | 0.3660652 | 0.4357253 | 0.4128549 |
80% | imp3 | 0.0188257 | 0.0189006 | 0.0189500 | 0.0188341 | 0.0188949 | 0.0188776 | 0.4779689 | 0.4242551 | 0.4165618 | 0.5053854 | 0.4560428 |
80% | mean | 0.0486525 | 0.0484767 | 0.0482804 | 0.0487259 | 0.0484943 | 0.0485339 | 0.2989229 | 0.2805987 | 0.2605959 | 0.2943069 | 0.2836061 |
80% | random | 0.0967558 | 0.0565147 | 0.1428038 | 0.0580634 | 0.0857940 | 0.0885344 | 0.8461323 | 0.9290810 | 0.1155406 | 0.8341027 | 0.6812142 |
85% | ||||||||||||
85% | kalman | 0.0003567 | 0.0002414 | 0.0003399 | 0.0003848 | 0.0003220 | 0.0003307 | 0.5336411 | 0.4581060 | 0.4311092 | 0.5150371 | 0.4844734 |
85% | interpolation | 0.0003568 | 0.0002415 | 0.0003425 | 0.0003855 | 0.0003232 | 0.0003316 | 0.5390964 | 0.4617429 | 0.4374038 | 0.5209120 | 0.4897888 |
85% | ma | 0.0004343 | 0.0003234 | 0.0004471 | 0.0004450 | 0.0004052 | 0.0004124 | 0.5718282 | 0.4860820 | 0.4788082 | 0.5623164 | 0.5247587 |
85% | locf | 0.0009119 | 0.0007365 | 0.0009503 | 0.0008236 | 0.0008368 | 0.0008556 | 0.5592391 | 0.4825850 | 0.4667786 | 0.5409148 | 0.5123794 |
85% | tidyr_fill | 0.0009119 | 0.0007365 | 0.0009503 | 0.0008236 | 0.0008368 | 0.0008556 | 0.5592391 | 0.4825850 | 0.4667786 | 0.5409148 | 0.5123794 |
85% | imp2 | 0.0285124 | 0.0284944 | 0.0284871 | 0.0284606 | 0.0284807 | 0.0284886 | 0.5101413 | 0.4222968 | 0.4008952 | 0.4583858 | 0.4479298 |
85% | imp4 | 0.0295915 | 0.0292050 | 0.0294749 | 0.0293755 | 0.0293518 | 0.0294117 | 0.5662330 | 0.4571269 | 0.4434187 | 0.5016086 | 0.4920968 |
85% | imp1 | 0.0296854 | 0.0294646 | 0.0295183 | 0.0295090 | 0.0294973 | 0.0295443 | 0.5305637 | 0.4369842 | 0.4357253 | 0.5021681 | 0.4763603 |
85% | imp5 | 0.0296229 | 0.0295257 | 0.0296607 | 0.0295922 | 0.0295929 | 0.0296004 | 0.5403553 | 0.4505525 | 0.4560078 | 0.5295846 | 0.4941251 |
85% | imp3 | 0.0309386 | 0.0309092 | 0.0309865 | 0.0308879 | 0.0309279 | 0.0309305 | 0.5620366 | 0.4683172 | 0.4411806 | 0.5178347 | 0.4973423 |
85% | mean | 0.0521145 | 0.0515481 | 0.0514613 | 0.0510225 | 0.0513439 | 0.0515366 | 0.2410127 | 0.2432508 | 0.2106588 | 0.2477270 | 0.2356623 |
85% | random | 0.3240766 | 0.0639116 | 0.0581029 | 0.0510604 | 0.0576916 | 0.1242879 | 0.9664289 | 0.1843614 | 0.9805567 | 0.9148133 | 0.7615401 |
90% | ||||||||||||
90% | interpolation | 0.0004602 | 0.0005112 | 0.0004253 | 0.0005540 | 0.0004968 | 0.0004877 | 0.5963072 | 0.5367184 | 0.4974122 | 0.6212058 | 0.5629109 |
90% | kalman | 0.0004602 | 0.0005172 | 0.0004253 | 0.0005541 | 0.0004989 | 0.0004892 | 0.5978459 | 0.5383970 | 0.4965729 | 0.6207861 | 0.5634005 |
90% | ma | 0.0006192 | 0.0006425 | 0.0005668 | 0.0007449 | 0.0006514 | 0.0006434 | 0.6475031 | 0.5676318 | 0.5312631 | 0.6360330 | 0.5956078 |
90% | locf | 0.0015472 | 0.0012908 | 0.0012546 | 0.0015022 | 0.0013492 | 0.0013987 | 0.6398098 | 0.5505665 | 0.5153168 | 0.6048398 | 0.5776332 |
90% | tidyr_fill | 0.0015472 | 0.0012908 | 0.0012546 | 0.0015022 | 0.0013492 | 0.0013987 | 0.6398098 | 0.5505665 | 0.5153168 | 0.6048398 | 0.5776332 |
90% | imp5 | 0.0440889 | 0.0441617 | 0.0444101 | 0.0440200 | 0.0441973 | 0.0441702 | 0.5168555 | 0.4466359 | 0.4127850 | 0.5109806 | 0.4718142 |
90% | imp2 | 0.0444710 | 0.0444534 | 0.0444560 | 0.0445331 | 0.0444808 | 0.0444784 | 0.5337809 | 0.4397818 | 0.4294307 | 0.4981116 | 0.4752763 |
90% | imp1 | 0.0461595 | 0.0461769 | 0.0463285 | 0.0462737 | 0.0462597 | 0.0462346 | 0.5740663 | 0.4841237 | 0.4734928 | 0.5550427 | 0.5216814 |
90% | imp3 | 0.0458608 | 0.0460663 | 0.0465595 | 0.0462600 | 0.0462953 | 0.0461867 | 0.5509862 | 0.4834243 | 0.4192195 | 0.5245489 | 0.4945447 |
90% | imp4 | 0.0462646 | 0.0464083 | 0.0464868 | 0.0463752 | 0.0464234 | 0.0463837 | 0.5572807 | 0.4809064 | 0.4618828 | 0.5704294 | 0.5176248 |
90% | mean | 0.0554369 | 0.0545033 | 0.0548398 | 0.0556674 | 0.0550035 | 0.0551119 | 0.9363547 | 0.9440481 | 0.1286893 | 0.9380333 | 0.7367814 |
90% | random | 0.0716249 | 0.1366404 | 0.0568678 | 0.0790605 | 0.0908562 | 0.0860484 | 0.1349839 | 0.0870052 | 0.9282417 | 0.9373339 | 0.5218912 |
## tick-data to 1-min data.
dfm2 <- list(dfm2_impTS, dfm2_amelia, dfm2_tidyr) %>% bind_rows
dfm2 %>% ddply(.(.id), arrange, Mean.HLC, Mean.OHLC, bias) %>% tbl_df %>%
kable(caption = 'Bias Comparison') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
group_rows('1%', 1, 12, label_row_css = 'background-color: #e68a00; color: #fff;') %>%
group_rows('10%', 13, 24, label_row_css = 'background-color: #ff0000; color: #fff;') %>%
group_rows('20%', 25, 36, label_row_css = 'background-color: #bf80ff; color: #fff;') %>%
group_rows('30%', 37, 48, label_row_css = 'background-color: #66ff33; color: #fff;') %>%
group_rows('50%', 49, 60, label_row_css = 'background-color: #6666ff; color: #fff;') %>%
group_rows('65%', 61, 72, label_row_css = 'background-color: #66e0ff; color: #fff;') %>%
group_rows('70%', 73, 84, label_row_css = 'background-color:#0066ff; color: #fff;') %>%
group_rows('80%', 85, 96, label_row_css = 'background-color: #ff9900; color: #fff;') %>%
group_rows('85%', 97, 108, label_row_css = 'background-color: #33ff33; color: #fff;') %>%
group_rows('90%', 109, 120, label_row_css = 'background-color: #339966; color: #fff;') %>%
scroll_box(width = '100%', height = '400px')
.id | Model | AskOpen | AskHigh | AskLow | AskClose | Mean.HLC | Mean.OHLC | bias.open | bias.high | bias.low | bias.close | bias |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1% | ||||||||||||
1% | imp2 | 48.06482 | 48.03929 | 48.09266 | 48.06683 | 48.06626 | 48.06590 | 0.0055283 | 0.0042998 | 0.0055283 | 0.0049140 | 0.0050676 |
1% | imp5 | 48.06493 | 48.03940 | 48.09263 | 48.06691 | 48.06631 | 48.06597 | 0.0049140 | 0.0042998 | 0.0049140 | 0.0042998 | 0.0046069 |
1% | locf | 48.06471 | 48.03943 | 48.09250 | 48.06706 | 48.06633 | 48.06592 | 0.0079853 | 0.0067568 | 0.0030713 | 0.0024570 | 0.0050676 |
1% | tidyr_fill | 48.06471 | 48.03943 | 48.09250 | 48.06706 | 48.06633 | 48.06592 | 0.0079853 | 0.0067568 | 0.0030713 | 0.0024570 | 0.0050676 |
1% | imp3 | 48.06477 | 48.03933 | 48.09253 | 48.06716 | 48.06634 | 48.06595 | 0.0036855 | 0.0036855 | 0.0024570 | 0.0024570 | 0.0030713 |
1% | imp1 | 48.06482 | 48.03931 | 48.09267 | 48.06713 | 48.06637 | 48.06598 | 0.0055283 | 0.0055283 | 0.0030713 | 0.0030713 | 0.0042998 |
1% | imp4 | 48.06483 | 48.03946 | 48.09270 | 48.06697 | 48.06638 | 48.06599 | 0.0036855 | 0.0030713 | 0.0042998 | 0.0036855 | 0.0036855 |
1% | kalman | 48.06484 | 48.03953 | 48.09285 | 48.06702 | 48.06646 | 48.06606 | 0.0030713 | 0.0024570 | 0.0030713 | 0.0024570 | 0.0027641 |
1% | interpolation | 48.06478 | 48.03953 | 48.09285 | 48.06702 | 48.06647 | 48.06605 | 0.0024570 | 0.0012285 | 0.0030713 | 0.0018428 | 0.0021499 |
1% | ma | 48.06507 | 48.03947 | 48.09286 | 48.06706 | 48.06647 | 48.06612 | 0.0055283 | 0.0055283 | 0.0042998 | 0.0055283 | 0.0052211 |
1% | mean | 48.06836 | 48.03966 | 48.09137 | 48.06870 | 48.06658 | 48.06702 | 0.0165848 | 0.0184275 | 0.0128993 | 0.0159705 | 0.0159705 |
1% | random | 48.06803 | 48.03820 | 48.10323 | 48.06020 | 48.06721 | 48.06742 | 0.0221130 | 0.0178133 | 0.0233415 | 0.0196560 | 0.0207310 |
10% | ||||||||||||
10% | imp4 | 48.06482 | 48.03913 | 48.09300 | 48.06658 | 48.06624 | 48.06588 | 0.0466830 | 0.0386978 | 0.0417690 | 0.0350123 | 0.0405405 |
10% | imp2 | 48.06486 | 48.03910 | 48.09271 | 48.06702 | 48.06627 | 48.06592 | 0.0423833 | 0.0393120 | 0.0337838 | 0.0337838 | 0.0373157 |
10% | imp5 | 48.06506 | 48.03883 | 48.09322 | 48.06684 | 48.06630 | 48.06599 | 0.0565111 | 0.0466830 | 0.0448403 | 0.0399263 | 0.0469902 |
10% | imp1 | 48.06460 | 48.03961 | 48.09248 | 48.06715 | 48.06642 | 48.06596 | 0.0479115 | 0.0337838 | 0.0411548 | 0.0307125 | 0.0383907 |
10% | imp3 | 48.06492 | 48.03909 | 48.09336 | 48.06684 | 48.06643 | 48.06605 | 0.0472973 | 0.0411548 | 0.0509828 | 0.0485258 | 0.0469902 |
10% | ma | 48.06389 | 48.03796 | 48.09338 | 48.06803 | 48.06646 | 48.06581 | 0.0626536 | 0.0540541 | 0.0540541 | 0.0552826 | 0.0565111 |
10% | kalman | 48.06387 | 48.03828 | 48.09319 | 48.06821 | 48.06656 | 48.06589 | 0.0528256 | 0.0491400 | 0.0448403 | 0.0448403 | 0.0479115 |
10% | interpolation | 48.06382 | 48.03829 | 48.09319 | 48.06827 | 48.06658 | 48.06589 | 0.0546683 | 0.0485258 | 0.0454545 | 0.0423833 | 0.0477580 |
10% | locf | 48.06388 | 48.04010 | 48.09277 | 48.06879 | 48.06722 | 48.06639 | 0.0589681 | 0.0485258 | 0.0558968 | 0.0472973 | 0.0526720 |
10% | tidyr_fill | 48.06388 | 48.04010 | 48.09277 | 48.06879 | 48.06722 | 48.06639 | 0.0589681 | 0.0485258 | 0.0558968 | 0.0472973 | 0.0526720 |
10% | mean | 48.07145 | 48.03286 | 48.10848 | 48.06614 | 48.06916 | 48.06973 | 0.1726044 | 0.1603194 | 0.1572482 | 0.1664619 | 0.1641585 |
10% | random | 47.91482 | 48.20938 | 48.19080 | 48.04397 | 48.14805 | 48.08974 | 0.1848894 | 0.1025799 | 0.2291155 | 0.1633907 | 0.1699939 |
20% | ||||||||||||
20% | random | 48.45310 | 48.01260 | 48.05931 | 47.86129 | 47.97773 | 48.09657 | 0.3366093 | 0.3218673 | 0.3126536 | 0.3359951 | 0.3267813 |
20% | imp4 | 48.06378 | 48.03896 | 48.09348 | 48.06752 | 48.06665 | 48.06593 | 0.0921376 | 0.0804668 | 0.0878378 | 0.0853808 | 0.0864558 |
20% | ma | 48.06454 | 48.03813 | 48.09355 | 48.06860 | 48.06676 | 48.06620 | 0.1087224 | 0.1068796 | 0.0939803 | 0.1081081 | 0.1044226 |
20% | imp2 | 48.06493 | 48.03925 | 48.09323 | 48.06796 | 48.06682 | 48.06635 | 0.0921376 | 0.0884521 | 0.0823096 | 0.0902948 | 0.0882985 |
20% | imp3 | 48.06402 | 48.04021 | 48.09313 | 48.06711 | 48.06682 | 48.06612 | 0.0902948 | 0.0743243 | 0.0939803 | 0.0859951 | 0.0861486 |
20% | locf | 48.06368 | 48.03972 | 48.09354 | 48.06721 | 48.06683 | 48.06604 | 0.1068796 | 0.0976658 | 0.0970516 | 0.0952088 | 0.0992015 |
20% | tidyr_fill | 48.06368 | 48.03972 | 48.09354 | 48.06721 | 48.06683 | 48.06604 | 0.1068796 | 0.0976658 | 0.0970516 | 0.0952088 | 0.0992015 |
20% | imp5 | 48.06468 | 48.03944 | 48.09354 | 48.06753 | 48.06684 | 48.06630 | 0.1007371 | 0.0786241 | 0.0853808 | 0.0687961 | 0.0833845 |
20% | imp1 | 48.06460 | 48.03932 | 48.09376 | 48.06750 | 48.06686 | 48.06629 | 0.1056511 | 0.0970516 | 0.0970516 | 0.0982801 | 0.0995086 |
20% | kalman | 48.06438 | 48.03886 | 48.09359 | 48.06910 | 48.06718 | 48.06648 | 0.0927518 | 0.0945946 | 0.0816953 | 0.0933661 | 0.0906020 |
20% | interpolation | 48.06431 | 48.03889 | 48.09359 | 48.06921 | 48.06723 | 48.06650 | 0.0872236 | 0.0896806 | 0.0755528 | 0.0853808 | 0.0844595 |
20% | mean | 48.07973 | 48.02832 | 48.08681 | 48.08731 | 48.06748 | 48.07054 | 0.2782555 | 0.2721130 | 0.2395577 | 0.2800983 | 0.2675061 |
30% | ||||||||||||
30% | random | 48.25972 | 48.06207 | 48.33286 | 47.77778 | 48.05757 | 48.10811 | 0.5165848 | 0.4864865 | 0.5558968 | 0.5423833 | 0.5253378 |
30% | imp2 | 48.06313 | 48.03819 | 48.09304 | 48.06734 | 48.06619 | 48.06543 | 0.1412776 | 0.1332924 | 0.1025799 | 0.1068796 | 0.1210074 |
30% | imp3 | 48.06348 | 48.03940 | 48.09352 | 48.06649 | 48.06647 | 48.06572 | 0.1308354 | 0.1210074 | 0.1081081 | 0.1093366 | 0.1173219 |
30% | imp5 | 48.06414 | 48.03950 | 48.09303 | 48.06767 | 48.06673 | 48.06608 | 0.1400491 | 0.1265356 | 0.1056511 | 0.1068796 | 0.1197789 |
30% | imp1 | 48.06165 | 48.03872 | 48.09386 | 48.06788 | 48.06682 | 48.06553 | 0.1425061 | 0.1314496 | 0.1173219 | 0.1216216 | 0.1282248 |
30% | interpolation | 48.06474 | 48.03922 | 48.09377 | 48.06792 | 48.06697 | 48.06641 | 0.1461916 | 0.1351351 | 0.1332924 | 0.1461916 | 0.1402027 |
30% | kalman | 48.06447 | 48.03942 | 48.09377 | 48.06791 | 48.06703 | 48.06639 | 0.1529484 | 0.1418919 | 0.1363636 | 0.1541769 | 0.1463452 |
30% | imp4 | 48.06310 | 48.04006 | 48.09321 | 48.06804 | 48.06711 | 48.06610 | 0.1351351 | 0.1283784 | 0.1062654 | 0.1142506 | 0.1210074 |
30% | ma | 48.06320 | 48.04018 | 48.09408 | 48.06792 | 48.06739 | 48.06635 | 0.1566339 | 0.1345209 | 0.1547912 | 0.1689189 | 0.1537162 |
30% | locf | 48.06791 | 48.04236 | 48.09580 | 48.06738 | 48.06851 | 48.06836 | 0.1621622 | 0.1461916 | 0.1541769 | 0.1658477 | 0.1570946 |
30% | tidyr_fill | 48.06791 | 48.04236 | 48.09580 | 48.06738 | 48.06851 | 48.06836 | 0.1621622 | 0.1461916 | 0.1541769 | 0.1658477 | 0.1570946 |
30% | mean | 48.07771 | 48.04385 | 48.10906 | 48.09428 | 48.08240 | 48.08122 | 0.3642506 | 0.3507371 | 0.3224816 | 0.3869779 | 0.3561118 |
50% | ||||||||||||
50% | random | 48.61875 | 48.18569 | 47.83183 | 47.39597 | 47.80450 | 48.00806 | 0.6044226 | 0.5761671 | 0.5909091 | 0.6173219 | 0.5972052 |
50% | locf | 48.06433 | 48.03798 | 48.09349 | 48.06476 | 48.06541 | 48.06514 | 0.2807125 | 0.2444717 | 0.2242015 | 0.2585995 | 0.2519963 |
50% | tidyr_fill | 48.06433 | 48.03798 | 48.09349 | 48.06476 | 48.06541 | 48.06514 | 0.2807125 | 0.2444717 | 0.2242015 | 0.2585995 | 0.2519963 |
50% | kalman | 48.06676 | 48.03676 | 48.09347 | 48.06627 | 48.06550 | 48.06581 | 0.2340295 | 0.2168305 | 0.1848894 | 0.2315725 | 0.2168305 |
50% | interpolation | 48.06646 | 48.03692 | 48.09347 | 48.06623 | 48.06554 | 48.06577 | 0.2223587 | 0.2094595 | 0.1799754 | 0.2260442 | 0.2094595 |
50% | ma | 48.06724 | 48.03628 | 48.09353 | 48.06685 | 48.06555 | 48.06597 | 0.2469287 | 0.2266585 | 0.1996314 | 0.2432432 | 0.2291155 |
50% | imp5 | 48.06570 | 48.04092 | 48.09609 | 48.06943 | 48.06881 | 48.06803 | 0.2506143 | 0.2346437 | 0.2223587 | 0.2297297 | 0.2343366 |
50% | imp3 | 48.06598 | 48.04386 | 48.09902 | 48.06832 | 48.07040 | 48.06930 | 0.2156020 | 0.1971744 | 0.2082310 | 0.2100737 | 0.2077703 |
50% | imp2 | 48.06819 | 48.04509 | 48.09811 | 48.07183 | 48.07168 | 48.07080 | 0.2340295 | 0.2235872 | 0.2094595 | 0.2260442 | 0.2232801 |
50% | imp4 | 48.06758 | 48.04450 | 48.09766 | 48.07323 | 48.07180 | 48.07074 | 0.2143735 | 0.2039312 | 0.1947174 | 0.2063882 | 0.2048526 |
50% | imp1 | 48.06908 | 48.04628 | 48.10127 | 48.07136 | 48.07297 | 48.07200 | 0.2192875 | 0.1934889 | 0.2039312 | 0.2027027 | 0.2048526 |
50% | mean | 48.10011 | 48.05789 | 48.10226 | 48.06800 | 48.07605 | 48.08206 | 0.4484029 | 0.3900491 | 0.3832924 | 0.4318182 | 0.4133907 |
65% | ||||||||||||
65% | imp3 | 48.06148 | 48.03451 | 48.08888 | 48.06000 | 48.06113 | 48.06122 | 0.3132678 | 0.2727273 | 0.2880835 | 0.2788698 | 0.2882371 |
65% | imp5 | 48.06606 | 48.03924 | 48.08798 | 48.06334 | 48.06352 | 48.06416 | 0.3335381 | 0.2899263 | 0.2506143 | 0.2555283 | 0.2824017 |
65% | locf | 48.06478 | 48.04620 | 48.08847 | 48.06434 | 48.06634 | 48.06595 | 0.3544226 | 0.3138821 | 0.2942260 | 0.3445946 | 0.3267813 |
65% | tidyr_fill | 48.06478 | 48.04620 | 48.08847 | 48.06434 | 48.06634 | 48.06595 | 0.3544226 | 0.3138821 | 0.2942260 | 0.3445946 | 0.3267813 |
65% | imp2 | 48.06781 | 48.03969 | 48.09423 | 48.06629 | 48.06673 | 48.06700 | 0.3347666 | 0.2819410 | 0.2813268 | 0.2647420 | 0.2906941 |
65% | imp1 | 48.06753 | 48.03945 | 48.09411 | 48.06812 | 48.06723 | 48.06730 | 0.3519656 | 0.3028256 | 0.2714988 | 0.2542998 | 0.2951474 |
65% | imp4 | 48.06864 | 48.03979 | 48.09638 | 48.06667 | 48.06761 | 48.06787 | 0.3464373 | 0.2929975 | 0.3015971 | 0.2745700 | 0.3039005 |
65% | ma | 48.07108 | 48.04394 | 48.09041 | 48.07027 | 48.06821 | 48.06892 | 0.3353808 | 0.2764128 | 0.2647420 | 0.3101966 | 0.2966830 |
65% | kalman | 48.07038 | 48.04468 | 48.08991 | 48.07061 | 48.06840 | 48.06890 | 0.3114251 | 0.2684275 | 0.2450860 | 0.2911548 | 0.2790233 |
65% | interpolation | 48.07036 | 48.04468 | 48.08991 | 48.07116 | 48.06858 | 48.06903 | 0.3095823 | 0.2659705 | 0.2457002 | 0.2948403 | 0.2790233 |
65% | mean | 48.06675 | 48.02981 | 48.12859 | 48.11165 | 48.09002 | 48.08420 | 0.4367322 | 0.3814496 | 0.3783784 | 0.4176904 | 0.4035627 |
65% | random | 47.50568 | 48.49940 | 49.01626 | 47.85631 | 48.45733 | 48.21942 | 0.8832924 | 0.6977887 | 0.9195332 | 0.8298526 | 0.8326167 |
70% | ||||||||||||
70% | mean | 48.12663 | 48.00807 | 48.08176 | 48.06426 | 48.05137 | 48.07018 | 0.8335381 | 0.7665848 | 0.3243243 | 0.3863636 | 0.5777027 |
70% | imp3 | 48.05517 | 48.02155 | 48.08947 | 48.06446 | 48.05849 | 48.05766 | 0.3187961 | 0.3175676 | 0.2911548 | 0.3402948 | 0.3169533 |
70% | locf | 48.05579 | 48.03106 | 48.07434 | 48.08234 | 48.06258 | 48.06088 | 0.4029484 | 0.4004914 | 0.3003686 | 0.4054054 | 0.3773034 |
70% | tidyr_fill | 48.05579 | 48.03106 | 48.07434 | 48.08234 | 48.06258 | 48.06088 | 0.4029484 | 0.4004914 | 0.3003686 | 0.4054054 | 0.3773034 |
70% | imp4 | 48.05450 | 48.03275 | 48.09509 | 48.06249 | 48.06344 | 48.06121 | 0.3243243 | 0.2628993 | 0.2807125 | 0.2585995 | 0.2816339 |
70% | ma | 48.05462 | 48.02955 | 48.08596 | 48.07753 | 48.06435 | 48.06191 | 0.3617936 | 0.3445946 | 0.2856265 | 0.3703931 | 0.3406020 |
70% | kalman | 48.05440 | 48.02955 | 48.08650 | 48.07724 | 48.06443 | 48.06192 | 0.3415233 | 0.3212531 | 0.2653563 | 0.3445946 | 0.3181818 |
70% | interpolation | 48.05448 | 48.02946 | 48.08666 | 48.07724 | 48.06445 | 48.06196 | 0.3421376 | 0.3243243 | 0.2616708 | 0.3452088 | 0.3183354 |
70% | imp1 | 48.06299 | 48.03345 | 48.09202 | 48.06911 | 48.06486 | 48.06439 | 0.3378378 | 0.3015971 | 0.2702703 | 0.2850123 | 0.2986794 |
70% | imp2 | 48.06721 | 48.03401 | 48.10299 | 48.07360 | 48.07020 | 48.06945 | 0.3310811 | 0.3065111 | 0.2788698 | 0.3003686 | 0.3042076 |
70% | imp5 | 48.06417 | 48.04374 | 48.09782 | 48.08140 | 48.07432 | 48.07178 | 0.2997543 | 0.2972973 | 0.2899263 | 0.3230958 | 0.3025184 |
70% | random | 47.68556 | 47.12402 | 47.95719 | 49.26794 | 48.11638 | 48.00868 | 0.9324324 | 0.9262899 | 0.9146192 | 0.9275184 | 0.9252150 |
80% | ||||||||||||
80% | locf | 48.07033 | 48.03224 | 48.09007 | 48.06271 | 48.06167 | 48.06384 | 0.4932432 | 0.4404177 | 0.3912776 | 0.4551597 | 0.4450246 |
80% | tidyr_fill | 48.07033 | 48.03224 | 48.09007 | 48.06271 | 48.06167 | 48.06384 | 0.4932432 | 0.4404177 | 0.3912776 | 0.4551597 | 0.4450246 |
80% | imp3 | 48.06543 | 48.03745 | 48.09131 | 48.05879 | 48.06252 | 48.06325 | 0.4778870 | 0.3384521 | 0.3673219 | 0.2868550 | 0.3676290 |
80% | imp2 | 48.09635 | 48.03502 | 48.09513 | 48.06611 | 48.06542 | 48.07315 | 0.4809582 | 0.4379607 | 0.3550369 | 0.3574939 | 0.4078624 |
80% | ma | 48.06795 | 48.04561 | 48.09361 | 48.06843 | 48.06922 | 48.06890 | 0.4398034 | 0.3808354 | 0.3863636 | 0.4391892 | 0.4115479 |
80% | interpolation | 48.06756 | 48.04682 | 48.09353 | 48.06823 | 48.06952 | 48.06903 | 0.4250614 | 0.3525799 | 0.3789926 | 0.4244472 | 0.3952703 |
80% | kalman | 48.06761 | 48.04682 | 48.09357 | 48.06821 | 48.06953 | 48.06905 | 0.4256757 | 0.3519656 | 0.3820639 | 0.4256757 | 0.3963452 |
80% | imp4 | 48.07939 | 48.04066 | 48.10873 | 48.06645 | 48.07195 | 48.07381 | 0.4662162 | 0.3765356 | 0.4041769 | 0.4011057 | 0.4120086 |
80% | imp5 | 48.08769 | 48.05208 | 48.11440 | 48.07758 | 48.08135 | 48.08294 | 0.3980344 | 0.3470516 | 0.3200246 | 0.3304668 | 0.3488943 |
80% | imp1 | 48.09547 | 48.06079 | 48.12047 | 48.08773 | 48.08966 | 48.09112 | 0.3599509 | 0.3396806 | 0.3065111 | 0.3476658 | 0.3384521 |
80% | mean | 48.05587 | 48.05459 | 48.17520 | 48.12306 | 48.11762 | 48.10218 | 0.8802211 | 0.2708845 | 0.8157248 | 0.2954545 | 0.5655713 |
80% | random | 49.44640 | 48.28223 | 49.41269 | 47.95915 | 48.55136 | 48.77512 | 0.9686732 | 0.9705160 | 0.9017199 | 0.9133907 | 0.9385749 |
85% | ||||||||||||
85% | locf | 48.06139 | 48.04274 | 48.09568 | 48.04604 | 48.06149 | 48.06146 | 0.5479115 | 0.4570025 | 0.5012285 | 0.5491400 | 0.5138206 |
85% | tidyr_fill | 48.06139 | 48.04274 | 48.09568 | 48.04604 | 48.06149 | 48.06146 | 0.5479115 | 0.4570025 | 0.5012285 | 0.5491400 | 0.5138206 |
85% | kalman | 48.05593 | 48.04052 | 48.08479 | 48.06086 | 48.06206 | 48.06052 | 0.4404177 | 0.3593366 | 0.3875921 | 0.4484029 | 0.4089373 |
85% | interpolation | 48.05584 | 48.04052 | 48.08479 | 48.06113 | 48.06214 | 48.06057 | 0.4410319 | 0.3660934 | 0.3900491 | 0.4606880 | 0.4144656 |
85% | ma | 48.05618 | 48.04110 | 48.08498 | 48.06091 | 48.06233 | 48.06079 | 0.5116708 | 0.4183047 | 0.4551597 | 0.5098280 | 0.4737408 |
85% | imp5 | 48.05452 | 48.04422 | 48.10806 | 48.09895 | 48.08375 | 48.07644 | 0.4299754 | 0.4103194 | 0.3869779 | 0.4434889 | 0.4176904 |
85% | imp3 | 48.06908 | 48.06095 | 48.10012 | 48.10192 | 48.08767 | 48.08302 | 0.3568796 | 0.3396806 | 0.3218673 | 0.3488943 | 0.3418305 |
85% | imp4 | 48.07092 | 48.06230 | 48.11810 | 48.08630 | 48.08890 | 48.08441 | 0.4459459 | 0.3200246 | 0.3495086 | 0.2696560 | 0.3462838 |
85% | imp2 | 48.07333 | 48.06250 | 48.11384 | 48.10463 | 48.09366 | 48.08858 | 0.3562654 | 0.3427518 | 0.3200246 | 0.3839066 | 0.3507371 |
85% | imp1 | 48.06422 | 48.06042 | 48.11247 | 48.11366 | 48.09552 | 48.08769 | 0.3765356 | 0.3568796 | 0.3427518 | 0.3820639 | 0.3645577 |
85% | mean | 48.11939 | 48.09357 | 48.10504 | 48.10364 | 48.10075 | 48.10541 | 0.2260442 | 0.1984029 | 0.2125307 | 0.2278870 | 0.2162162 |
85% | random | 49.15168 | 47.72769 | 49.75232 | 47.47577 | 48.31860 | 48.52687 | 0.9895577 | 0.9889435 | 0.9631450 | 0.9649877 | 0.9766585 |
90% | ||||||||||||
90% | random | 48.11770 | 47.55666 | 47.88066 | 47.62295 | 47.68676 | 47.79449 | 0.9410319 | 0.9367322 | 0.9613022 | 0.9637592 | 0.9507064 |
90% | locf | 48.09342 | 48.02838 | 48.08001 | 48.02318 | 48.04386 | 48.05625 | 0.5368550 | 0.5018428 | 0.5122850 | 0.6375921 | 0.5471437 |
90% | tidyr_fill | 48.09342 | 48.02838 | 48.08001 | 48.02318 | 48.04386 | 48.05625 | 0.5368550 | 0.5018428 | 0.5122850 | 0.6375921 | 0.5471437 |
90% | imp5 | 48.06921 | 48.04484 | 48.06735 | 48.03865 | 48.05028 | 48.05501 | 0.4299754 | 0.2929975 | 0.3108108 | 0.3882064 | 0.3554975 |
90% | imp3 | 48.07160 | 48.04393 | 48.06767 | 48.04916 | 48.05359 | 48.05809 | 0.3452088 | 0.3132678 | 0.2463145 | 0.3003686 | 0.3012899 |
90% | imp1 | 48.08072 | 48.03486 | 48.07313 | 48.05499 | 48.05433 | 48.06093 | 0.4189189 | 0.3409091 | 0.3114251 | 0.3544226 | 0.3564189 |
90% | interpolation | 48.09238 | 48.02948 | 48.08101 | 48.06488 | 48.05846 | 48.06694 | 0.5270270 | 0.5165848 | 0.4600737 | 0.6339066 | 0.5343980 |
90% | kalman | 48.09188 | 48.03086 | 48.08101 | 48.06581 | 48.05923 | 48.06739 | 0.5221130 | 0.5079853 | 0.4459459 | 0.6216216 | 0.5244165 |
90% | ma | 48.09193 | 48.03144 | 48.08093 | 48.06593 | 48.05943 | 48.06756 | 0.5737101 | 0.5110565 | 0.4944717 | 0.6228501 | 0.5505221 |
90% | imp4 | 48.10228 | 48.04703 | 48.07421 | 48.07541 | 48.06555 | 48.07473 | 0.4576167 | 0.3851351 | 0.2585995 | 0.3538084 | 0.3637899 |
90% | imp2 | 48.10489 | 48.05921 | 48.08469 | 48.07178 | 48.07189 | 48.08014 | 0.4600737 | 0.3789926 | 0.2911548 | 0.3568796 | 0.3717752 |
90% | mean | 48.03712 | 48.06868 | 48.06397 | 48.17040 | 48.10102 | 48.08504 | 0.1572482 | 0.1547912 | 0.1480344 | 0.1658477 | 0.1564803 |
以下乃日间数据,虽然与此文献的日内数据有所分别,但是也可作为参考。
fls <- list.files('data/fx/USDJPY', pattern = 'pred2.+.rds|^DCC.GARCH.USDJPY.HLC.[0-9]{4}-[0-9]{2}-[0-9]{2}.rds|^aDCC.GARCH.USDJPY.HLC.[0-9]{4}-[0-9]{2}-[0-9]{2}.rds|DCC.GARCH.USDJPY.OHLC.[0-9]{4}-[0-9]{2}-[0-9]{2}.rds')
## extract date
validate <- fls %>%
str_extract_all('[0-9]{4}-[0-9]{2}-[0-9]{2}') %>%
unlist %>%
plyr::count() %>%
tbl_df
validate %>% dplyr::filter(freq != 5) %>%
kable(caption = 'Count missing observation') %>%
kable_styling(
bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(height = '400px')
x | freq |
---|---|
2012-12-31 | 1 |
2013-02-05 | 3 |
2013-02-21 | 4 |
2013-02-25 | 3 |
2013-03-18 | 4 |
2013-03-20 | 3 |
2013-04-10 | 4 |
2013-04-14 | 3 |
2013-04-16 | 4 |
2013-04-18 | 4 |
2013-04-24 | 3 |
2014-10-23 | 4 |
2014-10-28 | 3 |
2014-12-15 | 3 |
2014-12-25 | 4 |
2014-12-29 | 3 |
2015-02-13 | 4 |
2015-02-17 | 3 |
2017-04-18 | 4 |
2017-04-20 | 1 |
2017-05-15 | 4 |
2017-05-17 | 1 |
2017-05-18 | 1 |
2017-08-27 | 4 |
2017-08-29 | 3 |
## Univariate
td <- validate %>% dplyr::filter(freq == 5) %>% .$x %>% ymd
flv <- llply(td, grep, fls, value = TRUE) %>% unlist %>% unique
## get only MSE and AIC/BIC but ommit VaR.
MSE.com <- ldply(flv, function(x) {
dfm <- readRDS(paste0('data/fx/USDJPY/', x))#[[1]]
if (!is.data.frame(dfm)) {
dfm %<>% .$res
}
names(dfm) %<>% str_replace_all('USDJPY', 'Price')
dfm %<>% separate(Type, c('Cat', 'Type', 'Model'))
if (ncol(dfm) == 10) {
dfm %<>% dplyr::filter(Type == 'Op' | Type == 'Hi' |
Type == 'Lo' | Type == 'Cl') %>%
mutate(Type2 = Type) %>%
spread(Type, Price) %>%
dplyr::rename(Price.Open = Op, Price.High = Hi,
Price.Low = Lo, Price.Close = Cl) %>%
spread(Type2, `Price.T+1`) %>%
dplyr::rename(Price.Open.T1 = Op, Price.High.T1 = Hi,
Price.Low.T1 = Lo, Price.Close.T1 = Cl) %>%
dplyr::select(Date, Price.Open, Price.High, Price.Low, Price.Close,
Price.Open.T1, Price.High.T1, Price.Low.T1, Price.Close.T1,
Akaike, Bayes, Shibata, Hannan.Quinn)
dfm %<>% mutate(Model = 'gjrGARCH', Cat = 'OHLC')
} else if (ncol(dfm) == 14) {
dfm %<>% dplyr::select(Date, Model, Price.High, Price.Low, Price.Close,
Price.High.T1, Price.Low.T1, Price.Close.T1,
Akaike, Bayes, Shibata, Hannan.Quinn) %>%
unique
dfm %<>% mutate(Cat = 'HLC')
} else if (ncol(dfm) == 16) {
dfm %<>% dplyr::select(Date, Model,
Price.Open, Price.High, Price.Low, Price.Close,
Price.Open.T1, Price.High.T1, Price.Low.T1,
Price.Close.T1, Akaike, Bayes, Shibata, Hannan.Quinn) %>%
mutate(Akaike = mean(Akaike), Bayes = mean(Bayes),
Shibata = mean(Shibata), Hannan.Quinn = mean(Hannan.Quinn)) %>%
unique
dfm %<>% mutate(Cat = 'OHLC')
} else {
dfm %<>% dfm
}
return(dfm)
}) %>% tbl_df
## Filter bias.
bias <- MSE.com %>%
mutate(Model = factor(Model)) %>%
dplyr::select(Date, Model, Cat, Price.Open.T1, Price.High.T1, Price.Low.T1, Price.Close.T1)
bias1 <- bias %>% dplyr::filter(Model != 'gjrGARCH')
bias2 <- bias %>% dplyr::filter(Model == 'gjrGARCH')
bias2A <- bias2[c(1:3)] %>% unique
bias2 <- bias2[-c(1:3)] %>%
rowSums(na.rm=TRUE) %>%
matrix(nc = 4, byrow=TRUE) %>%
as_data_frame %>%
dplyr::rename(Price.Open.T1 = V1, Price.High.T1 = V2,
Price.Low.T1 = V3, Price.Close.T1 = V4)
bias2 <- cbind(bias2A, bias2) %>% tbl_df
bias <- rbind(bias1, bias2) %>% tbl_df %>% arrange(Date)
rm(bias1, bias2A, bias2)
bias %<>%
mutate(
bias.open = if_else(Price.Open.T1>Price.High.T1|Price.Open.T1<Price.Low.T1, 1, 0),
bias.high = if_else(Price.High.T1<Price.Open.T1|Price.High.T1<Price.Low.T1|Price.High.T1<Price.Close.T1, 1, 0),
bias.low = if_else(Price.Low.T1>Price.Open.T1|Price.Low.T1>Price.High.T1|Price.Low.T1>Price.Close.T1, 1, 0),
bias.close = if_else(Price.Close.T1>Price.High.T1|Price.Close.T1<Price.Low.T1, 1, 0)) %>%
dplyr::select(Date, Model, Cat, Price.Open.T1, Price.High.T1, Price.Low.T1, Price.Close.T1, bias.open, bias.high, bias.low, bias.close) #%>%
#dplyr::filter(bias.open==1|bias.high==1|bias.low==1|bias.close==1)
bias
## # A tibble: 5,955 x 11
## Date Model Cat Price.Open.T1 Price.High.T1 Price.Low.T1
## <date> <fct> <chr> <dbl> <dbl> <dbl>
## 1 2013-01-01 aDCC HLC NA 86.8 86.8
## 2 2013-01-01 aDCC OHLC 86.5 86.8 86.8
## 3 2013-01-01 DCC HLC NA 86.8 86.8
## 4 2013-01-01 DCC OHLC 86.5 86.8 86.8
## 5 2013-01-01 gjrG~ OHLC 86.8 86.8 86.5
## 6 2013-01-02 aDCC HLC NA 87.3 86.5
## 7 2013-01-02 aDCC OHLC 86.7 87.3 86.5
## 8 2013-01-02 DCC HLC NA 87.3 86.5
## 9 2013-01-02 DCC OHLC 86.7 87.3 86.5
## 10 2013-01-02 gjrG~ OHLC 86.5 87.3 86.7
## # ... with 5,945 more rows, and 5 more variables: Price.Close.T1 <dbl>,
## # bias.open <dbl>, bias.high <dbl>, bias.low <dbl>, bias.close <dbl>
bias %>% ddply(.(Model, Cat), summarise,
bias.open = sum(bias.open, na.rm=TRUE)/length(bias.open),
bias.high = sum(bias.high, na.rm=TRUE)/length(bias.high),
bias.low = sum(bias.low, na.rm=TRUE)/length(bias.low),
bias.close = sum(bias.close, na.rm=TRUE)/length(bias.close),
bias = (bias.open + bias.high + bias.low + bias.close)/4,
n = length(Cat)) %>%
kable(caption = 'Bias Dataset') %>%
kable_styling(
bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
scroll_box(width = '100%')#, height = '400px')
Model | Cat | bias.open | bias.high | bias.low | bias.close | bias | n |
---|---|---|---|---|---|---|---|
aDCC | HLC | 0.0000000 | 0.1192275 | 0.1032746 | 0.2157851 | 0.1095718 | 1191 |
aDCC | OHLC | 0.1897565 | 0.1326616 | 0.1141898 | 0.2149454 | 0.1628883 | 1191 |
DCC | HLC | 0.0000000 | 0.1183879 | 0.1032746 | 0.2149454 | 0.1091520 | 1191 |
DCC | OHLC | 0.1897565 | 0.1326616 | 0.1150294 | 0.2174643 | 0.1637280 | 1191 |
gjrGARCH | OHLC | 0.9756507 | 0.9076406 | 0.9378673 | 0.9269521 | 0.9370277 | 1191 |
弥补来的数据得以以下标准:
MSE.HLC
(倘若是交易的话,一些模型不包括开市价)MSE.OHLC
(一些交易模型会拿开市价与上一个闭市价进行比较)bias
(误差与偏差,例如开闭市价都必须在最高低价之间,否则就是弥补偏差)从以上数据证明,弥补来的数据确实有误,如之前单变量的误差(开市或闭市价高于最高价、低于最低价)。僕们可以通过auto.arima
、ETS
、GARCH
或者其它方式回测数据和弥补缺失值,不过那就比较费时了。根据以上的imputeTS::na.seadec()
弥补来的数据binary.com 面试试题 I - 单变量数据缺失值管理,algorithm ='kalman'
或algorithm ='interpolation'
俩的误差率最低。
综合数据偏差比较中证明当1-min
数据缺失值\(\leq\)30%
的时候,amelia
多变量最为精准,然后当50%
\(\leq\)缺失值\(\leq\)65%
时,MSE和偏差(bias)俩都不及单变量sea.dec
。与此同时,tick to 1-min
数据证明sea.dec
乃最佳弥补数据的模型。
此文献的结论乃无法断定单变量或多变量比较适合弥补数据缺失值,不过有一点可以断定的是无论缺失值多寡,sea.dec(algorithm = 'interpolation')
和sea.dec(algorithm = 'kalman')
都将会比较有效弥补数据。最有效的方法可能是使用多变量DCC
模式(预测)3弥补缺失值,不过会非常耗时,而且该GARCH模型乃预测价格模型。
以下乃此文献资讯:
Category | session_info | Category | Sys.info |
---|---|---|---|
version | R version 3.5.1 (2018-07-02) | sysname | Windows |
os | Windows 10 x64 | release | 10 x64 |
system | x86_64, mingw32 | version | build 17134 |
ui | RTerm | nodename | RSTUDIO-SCIBROK |
language | en | machine | x86-64 |
collate | Japanese_Japan.932 | login | scibr |
ctype | Japanese_Japan.932 | user | scibr |
tz | Asia/Tokyo | effective_user | scibr |
date | 2018-10-25 | Current time | 2018-10-25 23:59:22 JST |
na.locf
by factors in data frame split by countryPowered by - Copyright® Intellectual Property Rights of ®個人の経営企業
欲知更多详情,请查阅binary.com Interview Question I - Interday High Frequency Trading Models Comparison。↩
欲知更多详情,请参阅一、什么是Tick Data。↩
binary.com Interview Question I - Multivariate GARCH Models中的多变量模型将会计算不同价格中的关系系数。↩