# test_mregn_20180212 by LangYuan
# 1.加入了收益均值((C-O)/O)和K线长度均值(tick表示,C-O)双指标
# 2.以日内新高代替了原来的N根bar新高,其中每天开盘后前6根bar的新高不计,即从第7根开始计算
# 3.编写新的数据预处理程序,使得可以生成前5分钟K线的后m分钟和下一根5分钟K线的前n分钟的回归数据(1<=m,n<=5)

# 计时
t_start=Sys.time()

library(xlsx)
## Loading required package: rJava
## Loading required package: xlsxjars
library(stringr)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
# 设置工作路径
setwd("C:\\Users\\Administrator\\Documents\\实习\\测试01_5min最后一根1min与下一根")
# list.files命令将data_all文件夹下所有文件名存入filenames
filenames_5min = list.files("data_5min")
filenames_1min = list.files("data_1min")
filenames_regr = list.files("data_regr")    
#filename_1min = "JT888_1min.csv"
#filename_5min = "JT888_5min.csv"                                                     
# 用paste命令构建路径变量filelist       
filepath_5min = paste("./data_5min/",filenames_5min,sep="")      
filepath_1min = paste("./data_1min/",filenames_1min,sep="") 
filepath_regr = paste("./data_regr/",filenames_regr,sep="")     
looplen=length(filenames_1min)
test.data=data.frame()


# 对文件夹中所有文件进行操作                                                    
for (iii in 1:looplen)
{
  # 读入数据
  kdata_1min=read.csv(filepath_1min[iii])
  kdata_5min=read.csv(filepath_5min[iii])
  kdata_proc=read.csv(filepath_regr[iii])
  
  alp=0.9   #设定分位数水平
  newhigh_len=5   #设定新高K线数范围
  
  # 处理一点小bug
  kdata_5min$plotdate.td[1]=kdata_5min$plotdate.td[2]
  
  # 计算通用数据
  #begtime_1min_last =kdata_proc$时间_last
  openpri_mlast =kdata_proc$开盘价_last
  highpri_mlast =kdata_proc$最高价_last
  lowpri_mlast = kdata_proc$最低价_last
  closepri_mlast=kdata_proc$收盘价_last
  volamo_mlast = kdata_proc$成交量_last 
  storeamo_mlast=kdata_proc$持仓量_last
  truedate_mlast=as.vector(kdata_proc$plotdate.td)
  
  #begtime_1min_next =kdata_proc$时间_next
  openpri_nnext =kdata_proc$开盘价_next
  highpri_nnext =kdata_proc$最高价_next
  lowpri_nnext = kdata_proc$最低价_next
  closepri_nnext=kdata_proc$收盘价_next
  volamo_nnext = kdata_proc$成交量_next 
  storeamo_nnext=kdata_proc$持仓量_next
  truedate_nnext=as.vector(kdata_proc$plotdate.td)
  state_flag=kdata_proc$state_flag
  
  
  begtime_5min =kdata_5min$X..时间
  openpri_5min =kdata_5min$开盘价
  highpri_5min =kdata_5min$最高价
  lowpri_5min = kdata_5min$最低价
  closepri_5min=kdata_5min$收盘价
  volamo_5min = kdata_5min$成交量 
  storeamo_5min=kdata_5min$持仓量
  truedate_5min=as.vector(kdata_5min$plotdate.td)
  begtime_5min=as.character(begtime_5min)
  data_len_5min=length(openpri_5min)
  
  begtime_1min =kdata_1min$X..时间
  openpri_1min =kdata_1min$开盘价
  highpri_1min =kdata_1min$最高价
  lowpri_1min = kdata_1min$最低价
  closepri_1min=kdata_1min$收盘价
  volamo_1min = kdata_1min$成交量 
  storeamo_1min=kdata_1min$持仓量
  truedate_1min=as.vector(kdata_1min$plotdate.td)
  begtime_1min=as.character(begtime_1min)
  data_len_1min=length(openpri_1min)
  
  
  candle_len_1min=closepri_1min-openpri_1min
  candle_len_5min=closepri_5min-openpri_5min
  candle_len_last=closepri_mlast-openpri_mlast
  candle_len_next=closepri_nnext-openpri_nnext
  candle_return_1min=(closepri_1min-openpri_1min)/openpri_1min
  candle_return_5min=(closepri_5min-openpri_5min)/openpri_5min
  candle_return_last=(closepri_mlast-openpri_mlast)/openpri_mlast
  candle_return_next=(closepri_nnext-openpri_nnext)/openpri_nnext
  
  
  # 标记滤去非同一交易日的数据
  flag_daysep_1min=truedate_1min==c(truedate_1min[2:(length(truedate_1min)-1)],0,0)
  flag_daysep_5min=truedate_5min==c(truedate_5min[2:(length(truedate_5min)-1)],0,0)
  
  # 1分钟所有的前一根k线收益和后一根k线收益回归
  data_len_1min=length(openpri_1min)
  flag_yin_1min_next=c(FALSE,(candle_return_1min<0)[1:data_len_1min-1])
  flag_yin_next_mul=ifelse(flag_yin_1min_next,-1,1)
  candle_return_1min_abs=abs(candle_return_1min)
  candle_return_1min_adjust=candle_return_1min*flag_yin_next_mul
  candle_len_1min_adjust=candle_len_1min*flag_yin_next_mul
  last_normal_return=candle_return_1min_abs[flag_daysep_1min]
  next_normal_return=candle_return_1min_adjust[c(FALSE,flag_daysep_1min[1:data_len_1min-1])]
  next_normal_len=candle_len_1min_adjust[c(FALSE,flag_daysep_1min[1:data_len_1min-1])]
  mean_normal_return=mean(next_normal_return,na.rm = TRUE)
  mean_normal_len=mean(next_normal_len,na.rm = TRUE)
  reg00=lm(next_normal_return~last_normal_return)
  
  # 所有5分钟的最后m根1分钟K线和下n根1分钟K线收益回归
  data_len_5min=length(openpri_5min)
  flag_yin=candle_return_last<0
  flag_yin_mul=ifelse(flag_yin,-1,1)
  candle_return_last_abs=abs(candle_return_last)
  candle_return_next_adjust=candle_return_next*flag_yin_mul
  candle_len_next_adjust=candle_len_next*flag_yin_mul
  last_51_return=candle_return_last_abs[flag_daysep_5min]
  next_51_return=candle_return_next_adjust[flag_daysep_5min]
  next_51_len=candle_len_next_adjust[flag_daysep_5min]  
  mean_51_return=mean(next_51_return,na.rm = TRUE)
  mean_51_len=mean(next_51_len,na.rm = TRUE)
  reg01=lm(next_51_return~last_51_return,na.action = na.exclude)
  
  # 一定幅度的5min大阳(阴)线最后一根1分钟K线和下一根1分钟K线收益回归
  candle_return_5min_abs=abs(candle_return_5min)
  threshold_5min=quantile(candle_return_5min_abs,probs=alp)
  print(sprintf('%s threshold=%f',filenames_1min[iii],threshold_5min))   #输出阈值
  flag_bigk_5min=(candle_return_5min_abs>threshold_5min)&flag_daysep_5min
  last_51bigk_return=candle_return_last_abs[flag_bigk_5min]
  next_51bigk_return=candle_return_next_adjust[flag_bigk_5min]
  next_51bigk_len=candle_len_next_adjust[flag_bigk_5min]
  mean_51bigk_return=mean(next_51bigk_return,na.rm = TRUE)
  mean_51bigk_len=mean(next_51bigk_len,na.rm = TRUE)
  reg02=lm(next_51bigk_return~last_51bigk_return,na.action = na.exclude)
  
  # 创新高新低的5分钟线最后一根1分钟K线和下一根1分钟K线收益回归
  daysep_flag=flag_daysep_5min
  # k_num=newhigh_len
  # temp_flag=which(!daysep_flag)
  # for (kk in 1:k_num-1){
  # daysep_flag[temp_flag+kk]=FALSE
  # }
  # daysep_flag[1:k_num]=FALSE
  # daysep_flag=daysep_flag[1:data_len_5min]
  # hh=c(rep(-99999999,k_num-1),rollapply(highpri_5min,k_num,max))
  # ll=c(rep(-99999999,k_num-1),rollapply(lowpri_5min,k_num,min))
  # newhigh_flag=which((highpri_5min==hh)&daysep_flag)
  # newlow_flag=which((lowpri_5min==ll)&daysep_flag)
  flag_daynewhigh=vector(length=data_len_5min)
  flag_daynewlow=vector(length=data_len_5min)
  highpri_temp=highpri_5min[1]
  lowpri_temp=lowpri_5min[1]
  count_temp=1
  for (jjj in 2:data_len_5min){
    if (truedate_5min[jjj]!=truedate_5min[jjj-1]){
      highpri_temp=highpri_5min[jjj]
      lowpri_temp=lowpri_5min[jjj]
      count_temp=1
    }
    else{
      count_temp=count_temp+1
      if (count_temp>6){
        if (highpri_5min[jjj]>highpri_temp)
        {
          highpri_temp=highpri_5min[jjj]
          flag_daynewhigh[jjj]=TRUE
        }
        if (lowpri_5min[jjj]<lowpri_5min[jjj])
        {
          lowpri_temp=lowpri_5min[jjj]
          flag_daynewlow[jjj]=TRUE
        }
      }
    }
    
  }
  newhigh_flag=flag_daynewhigh&daysep_flag
  newlow_flag=flag_daynewlow&daysep_flag
  last_newhigh_return=candle_return_last_abs[newhigh_flag]
  last_newlow_return=candle_return_last_abs[newlow_flag]
  last_newhighlow_return=c(last_newhigh_return,last_newlow_return)
  next_newhigh_return=candle_return_next[newhigh_flag]
  next_newlow_return=-candle_return_next[newlow_flag]
  next_newhighlow_return=c(next_newhigh_return,next_newlow_return)
  next_newhigh_len=candle_len_next[newhigh_flag]
  next_newlow_len=-candle_len_next[newlow_flag]
  next_newhighlow_len=c(next_newhigh_len,next_newlow_len)
  mean_newhighlow_return=mean(next_newhighlow_return,na.rm = TRUE)
  mean_newhighlow_len=mean(next_newhighlow_len,na.rm = TRUE)
  reg03=lm(next_newhighlow_return~last_newhighlow_return)   
  
  
  # 创建新的数据帧记录测试结果
  # Create the second data frame
  test.newdata <- data.frame(
    "标的名称" = substr(filenames_1min[iii],1,2),
    "普通K线回归t值" = summary(reg00)$coefficients[6],
    "普通K线次bar均值" = mean_normal_return,
    "普通K线次bar长度(tick)" = mean_normal_len,
    #"普通K线回归系数" = summary(reg00)$coefficients[2],
    "51K线回归t值"=summary(reg01)$coefficients[6],
    "51K线次bar均值"=mean_51_return,
    "51K线次bar长度(tick)"=mean_51_len,
    #"51K线回归系数"=summary(reg01)$coefficients[2],
    "大k线51回归t值"=summary(reg02)$coefficients[6],
    "大k线51次bar均值"=mean_51bigk_return,
    "大k线51次bar长度(tick)"=mean_51bigk_len,
    #"大k线51回归系数"=summary(reg02)$coefficients[2],
    "新高K线回归t值"=summary(reg03)$coefficients[6],
    "新高k线次bar均值"=mean_newhighlow_return,
    "新高k线次bar长度(tick)"=mean_newhighlow_len,
    #"新高k线回归系数"=summary(reg03)$coefficients[2],
    "大K线阈值"=threshold_5min,
    stringsAsFactors = FALSE
  )
  # Bind the two data frames.
  test.data = rbind(test.data,test.newdata)
}
## [1] "a9888_1min.csv threshold=0.001662"
## [1] "ag888_1min.csv threshold=0.001319"
## [1] "al888_1min.csv threshold=0.001507"
## [1] "au888_1min.csv threshold=0.000884"
## [1] "bu888_1min.csv threshold=0.003065"
## [1] "c9888_1min.csv threshold=0.001294"
## [1] "cf888_1min.csv threshold=0.001822"
## [1] "cs888_1min.csv threshold=0.002311"
## [1] "cu888_1min.csv threshold=0.001826"
## [1] "fg888_1min.csv threshold=0.002796"
## [1] "hc888_1min.csv threshold=0.002860"
## [1] "i9888_1min.csv threshold=0.004043"
## [1] "j9888_1min.csv threshold=0.003096"
## [1] "jd888_1min.csv threshold=0.002283"
## [1] "jm888_1min.csv threshold=0.003750"
## [1] "l9888_1min.csv threshold=0.002825"
## [1] "m9888_1min.csv threshold=0.001817"
## [1] "ma888_1min.csv threshold=0.002740"
## [1] "ni888_1min.csv threshold=0.002381"
## [1] "oi888_1min.csv threshold=0.001651"
## [1] "p9888_1min.csv threshold=0.002093"
## [1] "pb888_1min.csv threshold=0.001868"
## [1] "pp888_1min.csv threshold=0.002877"
## [1] "rb888_1min.csv threshold=0.002392"
## [1] "rm888_1min.csv threshold=0.002231"
## [1] "ru888_1min.csv threshold=0.003449"
## [1] "sn888_1min.csv threshold=0.001833"
## [1] "sr888_1min.csv threshold=0.001747"
## [1] "t9888_1min.csv threshold=0.000493"
## [1] "ta888_1min.csv threshold=0.002208"
## [1] "tf888_1min.csv threshold=0.000363"
## [1] "v9888_1min.csv threshold=0.002295"
## [1] "y9888_1min.csv threshold=0.001801"
## [1] "zc888_1min.csv threshold=0.002609"
## [1] "zn888_1min.csv threshold=0.002138"
# 输出结果到Excel文件
write.xlsx(test.data,"test_result.xlsx")

# 输出运行时间
t_end=Sys.time()
t_end-t_start
## Time difference of 4.297012 mins