# test_mregn_20180212 by LangYuan
# 1.加入了收益均值((C-O)/O)和K线长度均值(tick表示,C-O)双指标
# 2.以日内新高代替了原来的N根bar新高,其中每天开盘后前6根bar的新高不计,即从第7根开始计算
# 3.编写新的数据预处理程序,使得可以生成前5分钟K线的后m分钟和下一根5分钟K线的前n分钟的回归数据(1<=m,n<=5)
# 计时
t_start=Sys.time()
library(xlsx)
## Loading required package: rJava
## Loading required package: xlsxjars
library(stringr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# 设置工作路径
setwd("C:\\Users\\Administrator\\Documents\\实习\\测试01_5min最后一根1min与下一根")
# list.files命令将data_all文件夹下所有文件名存入filenames
filenames_5min = list.files("data_5min")
filenames_1min = list.files("data_1min")
filenames_regr = list.files("data_regr")
#filename_1min = "JT888_1min.csv"
#filename_5min = "JT888_5min.csv"
# 用paste命令构建路径变量filelist
filepath_5min = paste("./data_5min/",filenames_5min,sep="")
filepath_1min = paste("./data_1min/",filenames_1min,sep="")
filepath_regr = paste("./data_regr/",filenames_regr,sep="")
looplen=length(filenames_1min)
test.data=data.frame()
# 对文件夹中所有文件进行操作
for (iii in 1:looplen)
{
# 读入数据
kdata_1min=read.csv(filepath_1min[iii])
kdata_5min=read.csv(filepath_5min[iii])
kdata_proc=read.csv(filepath_regr[iii])
alp=0.9 #设定分位数水平
newhigh_len=5 #设定新高K线数范围
# 处理一点小bug
kdata_5min$plotdate.td[1]=kdata_5min$plotdate.td[2]
# 计算通用数据
#begtime_1min_last =kdata_proc$时间_last
openpri_mlast =kdata_proc$开盘价_last
highpri_mlast =kdata_proc$最高价_last
lowpri_mlast = kdata_proc$最低价_last
closepri_mlast=kdata_proc$收盘价_last
volamo_mlast = kdata_proc$成交量_last
storeamo_mlast=kdata_proc$持仓量_last
truedate_mlast=as.vector(kdata_proc$plotdate.td)
#begtime_1min_next =kdata_proc$时间_next
openpri_nnext =kdata_proc$开盘价_next
highpri_nnext =kdata_proc$最高价_next
lowpri_nnext = kdata_proc$最低价_next
closepri_nnext=kdata_proc$收盘价_next
volamo_nnext = kdata_proc$成交量_next
storeamo_nnext=kdata_proc$持仓量_next
truedate_nnext=as.vector(kdata_proc$plotdate.td)
state_flag=kdata_proc$state_flag
begtime_5min =kdata_5min$X..时间
openpri_5min =kdata_5min$开盘价
highpri_5min =kdata_5min$最高价
lowpri_5min = kdata_5min$最低价
closepri_5min=kdata_5min$收盘价
volamo_5min = kdata_5min$成交量
storeamo_5min=kdata_5min$持仓量
truedate_5min=as.vector(kdata_5min$plotdate.td)
begtime_5min=as.character(begtime_5min)
data_len_5min=length(openpri_5min)
begtime_1min =kdata_1min$X..时间
openpri_1min =kdata_1min$开盘价
highpri_1min =kdata_1min$最高价
lowpri_1min = kdata_1min$最低价
closepri_1min=kdata_1min$收盘价
volamo_1min = kdata_1min$成交量
storeamo_1min=kdata_1min$持仓量
truedate_1min=as.vector(kdata_1min$plotdate.td)
begtime_1min=as.character(begtime_1min)
data_len_1min=length(openpri_1min)
candle_len_1min=closepri_1min-openpri_1min
candle_len_5min=closepri_5min-openpri_5min
candle_len_last=closepri_mlast-openpri_mlast
candle_len_next=closepri_nnext-openpri_nnext
candle_return_1min=(closepri_1min-openpri_1min)/openpri_1min
candle_return_5min=(closepri_5min-openpri_5min)/openpri_5min
candle_return_last=(closepri_mlast-openpri_mlast)/openpri_mlast
candle_return_next=(closepri_nnext-openpri_nnext)/openpri_nnext
# 标记滤去非同一交易日的数据
flag_daysep_1min=truedate_1min==c(truedate_1min[2:(length(truedate_1min)-1)],0,0)
flag_daysep_5min=truedate_5min==c(truedate_5min[2:(length(truedate_5min)-1)],0,0)
# 1分钟所有的前一根k线收益和后一根k线收益回归
data_len_1min=length(openpri_1min)
flag_yin_1min_next=c(FALSE,(candle_return_1min<0)[1:data_len_1min-1])
flag_yin_next_mul=ifelse(flag_yin_1min_next,-1,1)
candle_return_1min_abs=abs(candle_return_1min)
candle_return_1min_adjust=candle_return_1min*flag_yin_next_mul
candle_len_1min_adjust=candle_len_1min*flag_yin_next_mul
last_normal_return=candle_return_1min_abs[flag_daysep_1min]
next_normal_return=candle_return_1min_adjust[c(FALSE,flag_daysep_1min[1:data_len_1min-1])]
next_normal_len=candle_len_1min_adjust[c(FALSE,flag_daysep_1min[1:data_len_1min-1])]
mean_normal_return=mean(next_normal_return,na.rm = TRUE)
mean_normal_len=mean(next_normal_len,na.rm = TRUE)
reg00=lm(next_normal_return~last_normal_return)
# 所有5分钟的最后m根1分钟K线和下n根1分钟K线收益回归
data_len_5min=length(openpri_5min)
flag_yin=candle_return_last<0
flag_yin_mul=ifelse(flag_yin,-1,1)
candle_return_last_abs=abs(candle_return_last)
candle_return_next_adjust=candle_return_next*flag_yin_mul
candle_len_next_adjust=candle_len_next*flag_yin_mul
last_51_return=candle_return_last_abs[flag_daysep_5min]
next_51_return=candle_return_next_adjust[flag_daysep_5min]
next_51_len=candle_len_next_adjust[flag_daysep_5min]
mean_51_return=mean(next_51_return,na.rm = TRUE)
mean_51_len=mean(next_51_len,na.rm = TRUE)
reg01=lm(next_51_return~last_51_return,na.action = na.exclude)
# 一定幅度的5min大阳(阴)线最后一根1分钟K线和下一根1分钟K线收益回归
candle_return_5min_abs=abs(candle_return_5min)
threshold_5min=quantile(candle_return_5min_abs,probs=alp)
print(sprintf('%s threshold=%f',filenames_1min[iii],threshold_5min)) #输出阈值
flag_bigk_5min=(candle_return_5min_abs>threshold_5min)&flag_daysep_5min
last_51bigk_return=candle_return_last_abs[flag_bigk_5min]
next_51bigk_return=candle_return_next_adjust[flag_bigk_5min]
next_51bigk_len=candle_len_next_adjust[flag_bigk_5min]
mean_51bigk_return=mean(next_51bigk_return,na.rm = TRUE)
mean_51bigk_len=mean(next_51bigk_len,na.rm = TRUE)
reg02=lm(next_51bigk_return~last_51bigk_return,na.action = na.exclude)
# 创新高新低的5分钟线最后一根1分钟K线和下一根1分钟K线收益回归
daysep_flag=flag_daysep_5min
# k_num=newhigh_len
# temp_flag=which(!daysep_flag)
# for (kk in 1:k_num-1){
# daysep_flag[temp_flag+kk]=FALSE
# }
# daysep_flag[1:k_num]=FALSE
# daysep_flag=daysep_flag[1:data_len_5min]
# hh=c(rep(-99999999,k_num-1),rollapply(highpri_5min,k_num,max))
# ll=c(rep(-99999999,k_num-1),rollapply(lowpri_5min,k_num,min))
# newhigh_flag=which((highpri_5min==hh)&daysep_flag)
# newlow_flag=which((lowpri_5min==ll)&daysep_flag)
flag_daynewhigh=vector(length=data_len_5min)
flag_daynewlow=vector(length=data_len_5min)
highpri_temp=highpri_5min[1]
lowpri_temp=lowpri_5min[1]
count_temp=1
for (jjj in 2:data_len_5min){
if (truedate_5min[jjj]!=truedate_5min[jjj-1]){
highpri_temp=highpri_5min[jjj]
lowpri_temp=lowpri_5min[jjj]
count_temp=1
}
else{
count_temp=count_temp+1
if (count_temp>6){
if (highpri_5min[jjj]>highpri_temp)
{
highpri_temp=highpri_5min[jjj]
flag_daynewhigh[jjj]=TRUE
}
if (lowpri_5min[jjj]<lowpri_5min[jjj])
{
lowpri_temp=lowpri_5min[jjj]
flag_daynewlow[jjj]=TRUE
}
}
}
}
newhigh_flag=flag_daynewhigh&daysep_flag
newlow_flag=flag_daynewlow&daysep_flag
last_newhigh_return=candle_return_last_abs[newhigh_flag]
last_newlow_return=candle_return_last_abs[newlow_flag]
last_newhighlow_return=c(last_newhigh_return,last_newlow_return)
next_newhigh_return=candle_return_next[newhigh_flag]
next_newlow_return=-candle_return_next[newlow_flag]
next_newhighlow_return=c(next_newhigh_return,next_newlow_return)
next_newhigh_len=candle_len_next[newhigh_flag]
next_newlow_len=-candle_len_next[newlow_flag]
next_newhighlow_len=c(next_newhigh_len,next_newlow_len)
mean_newhighlow_return=mean(next_newhighlow_return,na.rm = TRUE)
mean_newhighlow_len=mean(next_newhighlow_len,na.rm = TRUE)
reg03=lm(next_newhighlow_return~last_newhighlow_return)
# 创建新的数据帧记录测试结果
# Create the second data frame
test.newdata <- data.frame(
"标的名称" = substr(filenames_1min[iii],1,2),
"普通K线回归t值" = summary(reg00)$coefficients[6],
"普通K线次bar均值" = mean_normal_return,
"普通K线次bar长度(tick)" = mean_normal_len,
#"普通K线回归系数" = summary(reg00)$coefficients[2],
"51K线回归t值"=summary(reg01)$coefficients[6],
"51K线次bar均值"=mean_51_return,
"51K线次bar长度(tick)"=mean_51_len,
#"51K线回归系数"=summary(reg01)$coefficients[2],
"大k线51回归t值"=summary(reg02)$coefficients[6],
"大k线51次bar均值"=mean_51bigk_return,
"大k线51次bar长度(tick)"=mean_51bigk_len,
#"大k线51回归系数"=summary(reg02)$coefficients[2],
"新高K线回归t值"=summary(reg03)$coefficients[6],
"新高k线次bar均值"=mean_newhighlow_return,
"新高k线次bar长度(tick)"=mean_newhighlow_len,
#"新高k线回归系数"=summary(reg03)$coefficients[2],
"大K线阈值"=threshold_5min,
stringsAsFactors = FALSE
)
# Bind the two data frames.
test.data = rbind(test.data,test.newdata)
}
## [1] "a9888_1min.csv threshold=0.001662"
## [1] "ag888_1min.csv threshold=0.001319"
## [1] "al888_1min.csv threshold=0.001507"
## [1] "au888_1min.csv threshold=0.000884"
## [1] "bu888_1min.csv threshold=0.003065"
## [1] "c9888_1min.csv threshold=0.001294"
## [1] "cf888_1min.csv threshold=0.001822"
## [1] "cs888_1min.csv threshold=0.002311"
## [1] "cu888_1min.csv threshold=0.001826"
## [1] "fg888_1min.csv threshold=0.002796"
## [1] "hc888_1min.csv threshold=0.002860"
## [1] "i9888_1min.csv threshold=0.004043"
## [1] "j9888_1min.csv threshold=0.003096"
## [1] "jd888_1min.csv threshold=0.002283"
## [1] "jm888_1min.csv threshold=0.003750"
## [1] "l9888_1min.csv threshold=0.002825"
## [1] "m9888_1min.csv threshold=0.001817"
## [1] "ma888_1min.csv threshold=0.002740"
## [1] "ni888_1min.csv threshold=0.002381"
## [1] "oi888_1min.csv threshold=0.001651"
## [1] "p9888_1min.csv threshold=0.002093"
## [1] "pb888_1min.csv threshold=0.001868"
## [1] "pp888_1min.csv threshold=0.002877"
## [1] "rb888_1min.csv threshold=0.002392"
## [1] "rm888_1min.csv threshold=0.002231"
## [1] "ru888_1min.csv threshold=0.003449"
## [1] "sn888_1min.csv threshold=0.001833"
## [1] "sr888_1min.csv threshold=0.001747"
## [1] "t9888_1min.csv threshold=0.000493"
## [1] "ta888_1min.csv threshold=0.002208"
## [1] "tf888_1min.csv threshold=0.000363"
## [1] "v9888_1min.csv threshold=0.002295"
## [1] "y9888_1min.csv threshold=0.001801"
## [1] "zc888_1min.csv threshold=0.002609"
## [1] "zn888_1min.csv threshold=0.002138"
# 输出结果到Excel文件
write.xlsx(test.data,"test_result.xlsx")
# 输出运行时间
t_end=Sys.time()
t_end-t_start
## Time difference of 4.297012 mins