library(quantmod)
## Warning: package 'quantmod' was built under R version 3.2.5
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.2.5
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.2.5
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.2.5
## Version 0.4-0 included new data defaults. See ?getSymbols.
stockdf = getSymbols("2330.TW", auto.assign=FALSE)
##     As of 0.4-0, 'getSymbols' uses env=parent.frame() and
##  auto.assign=TRUE by default.
## 
##  This  behavior  will be  phased out in 0.5-0  when the call  will
##  default to use auto.assign=FALSE. getOption("getSymbols.env") and 
##  getOptions("getSymbols.auto.assign") are now checked for alternate defaults
## 
##  This message is shown once per session and may be disabled by setting 
##  options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 128346 != reported length 200
# lag
head(cbind(stockdf[,4], lag(stockdf[,4],1)))
##            X2330.TW.Close X2330.TW.Close.1
## 2007-01-02        67.3632               NA
## 2007-01-03        67.6617          67.3632
## 2007-01-04        67.3632          67.6617
## 2007-01-05        66.5673          67.3632
## 2007-01-08        65.4727          66.5673
## 2007-01-09        65.6717          65.4727
# daily return 
daily_return= (stockdf[,4] - lag(stockdf[,4],1)) / lag(stockdf[,4],1)

getReturn <- function(stockdf,days = 1){
    # from days return
  if (days > 0){ 
    ret =  (stockdf[,4] - lag(stockdf[,4] , days)) /   lag(stockdf[,4] , days)
  }else{
    ret =  (lag(stockdf[,4] , days) - stockdf[,4] ) /   stockdf[,4]
  }
    ret
}

daily_return = getReturn(stockdf, -2)
weekly_return = getReturn(stockdf, 7)
head(cbind(stockdf[,4], lag(stockdf[,4], -1), daily_return))
##            X2330.TW.Close X2330.TW.Close.1 X2330.TW.Close.2
## 2007-01-02        67.3632          67.6617      0.000000000
## 2007-01-03        67.6617          67.3632     -0.016174586
## 2007-01-04        67.3632          66.5673     -0.028064284
## 2007-01-05        66.5673          65.4727     -0.013454053
## 2007-01-08        65.4727          65.6717     -0.024317005
## 2007-01-09        65.6717          63.8806     -0.003030225
# getReturn(stockdf, 7) ,  7 days before return
# getReturn(stockdf, -2),  2 days after  return
library(jiebaR)
## Warning: package 'jiebaR' was built under R version 3.2.5
## Loading required package: jiebaRD
## Warning: package 'jiebaRD' was built under R version 3.2.5
mixseg =worker()
news <- read.csv('moneydj201415_big5.csv', header=TRUE)

# Type Conversion
news$Content = as.character(news$Content)
news$DateTime = as.character(news$DateTime)
news$Title = as.character(news$Title)

# Add Date
news$Date = lapply(news$DateTime, function(e)
  strsplit(e, ' ' )[[1]][1]  )  

news.content=  news[grepl('激勵', news$Content), 'Content' ]
news.content = unique(news.content)

# use jiebar to segment words
library(jiebaR)
mixeg= worker()
content.all=lapply(news.content, function(e)segment(code=e, jiebar=mixseg))


# Build Document Term Matrix
library(tm)
## Loading required package: NLP
jieba_tokenizer=function(d){
  unlist(segment(d[[1]],mixseg))
}

space_tokenizer=function(x){
  unlist(strsplit(as.character(x[[1]]),'[[:space:]]+'))
}

doc=VCorpus(VectorSource(content.all))
doc=unlist(tm_map(doc,jieba_tokenizer),recursive=F)
doc=lapply(doc,function(d)paste(d,collapse=' '))
control.list=list(wordLengths=c(2,Inf),tokenize=space_tokenizer)
dtm=DocumentTermMatrix(Corpus(VectorSource(doc)),control=control.list)
findFreqTerms(dtm, 20)
##   [1] "10"       "11"       "12"       "13"       "14"       "15"      
##   [7] "16"       "17"       "18"       "19"       "20"       "2012"    
##  [13] "2013"     "2014"     "2015"     "21"       "22"       "24"      
##  [19] "25"       "26"       "27"       "28"       "30"       "35"      
##  [25] "40"       "50"       "500"      "eps"      "ic"       "iphone"  
##  [31] "led"      "lineups"  "moneydj"  "mosfet"   "nb"       "on"      
##  [37] "pc"       "q1"       "q2"       "q3"       "q4"       "ups"     
##  [43] "一年"     "一度"     "一個"     "二成"     "入帳"     "力道"    
##  [49] "上半年"   "上市"     "上揚"     "上游"     "上漲"     "上緯"    
##  [55] "下半年"   "下跌"     "下滑"     "也將"     "土地"     "大田"    
##  [61] "大陸"     "大幅"     "大增"     "大廠"     "大關"     "子公司"  
##  [67] "小幅"     "工作"     "工業"     "已經"     "不如"     "不過"    
##  [73] "中心"     "中信金"   "中國"     "中碳"     "中鋼"     "五成"    
##  [79] "今年"     "公司"     "公布"     "公噸"     "分別"     "分析"    
##  [85] "分析師"   "切入"     "化工"     "升至"     "升溫"     "反彈"    
##  [91] "天數"     "太陽能"   "巴西"     "手機"     "方面"     "日本"    
##  [97] "日前"     "日圓"     "月份"     "月營"     "比重"     "毛利率"  
## [103] "水準"     "主要"     "代工"     "以上"     "以及"     "以來"    
## [109] "出現"     "出貨"     "出貨量"   "加上"     "加工"     "包括"    
## [115] "半導體"   "占營收"   "去年"     "去年同期" "另外"     "可以"    
## [121] "可能"     "可望"     "台達電"   "台積電"   "台灣"     "四成"    
## [127] "外銷"     "尼龍"     "左右"     "市占率"   "市場"     "布局"    
## [133] "平台"     "平均"     "平板"     "未來"     "本季"     "本業"    
## [139] "正式"     "永光"     "永記"     "生技"     "生產"     "用於"    
## [145] "由於"     "目前"     "目標"     "任何"     "企業"     "先前"    
## [151] "光電"     "全年"     "全球"     "再度"     "合作"     "合併"    
## [157] "合資"     "同步"     "同時"     "同期"     "向上"     "回升"    
## [163] "回溫"     "因此"     "因素"     "地區"     "年底"     "年度"    
## [169] "年減"     "年增"     "成分股"   "成本"     "成立"     "成長"    
## [175] "成為"     "收入"     "收益"     "收盤"     "有利"     "有望"    
## [181] "此外"     "自有"     "至於"     "行動"     "行情"     "估計"    
## [187] "伺服器"   "低於"     "佔營收"   "利用率"   "利益"     "利率"    
## [193] "即將"     "呈現"     "完成"     "技術"     "投入"     "投產"    
## [199] "投資"     "投資人"   "改善"     "杏昌"     "材料"     "每股"    
## [205] "決定"     "汽車"     "系統"     "肝癌"     "走強"     "走勢"    
## [211] "事業"     "亞洲"     "併購"     "來自"     "來到"     "來看"    
## [217] "供應"     "供應商"   "供應鏈"   "兩年"     "其中"     "其他"    
## [223] "取得"     "受到"     "受惠"     "奈米"     "季增"     "怡潔"    
## [229] "拉貨"     "旺季"     "明年"     "明顯"     "服務"     "東京"    
## [235] "油脂"     "油價"     "治療"     "法人"     "狀況"     "空間"    
## [241] "股利"     "股價"     "表示"     "表現"     "近年"     "近期"    
## [247] "金風"     "金融"     "長期"     "長榮"     "長線"     "長興"    
## [253] "亮眼"     "信驊"     "前三季"   "南僑"     "品牌"     "客戶"    
## [259] "宣布"     "封裝"     "建議"     "持平"     "持股"     "持續"    
## [265] "指出"     "指數"     "挑戰"     "政策"     "春節"     "染料"    
## [271] "為主"     "盈餘"     "相當"     "相較"     "相對"     "相關"    
## [277] "看好"     "研究"     "突破"     "紀錄"     "美元"     "美國"    
## [283] "英特爾"   "訂單"     "計畫"     "負極"     "降低"     "面板"    
## [289] "風力"     "風電"     "首季"     "原先"     "原料"     "展望"    
## [295] "庫存"     "挹注"     "效益"     "效應"     "根據"     "海外"    
## [301] "消息"     "能力"     "航空"     "航線"     "衰退"     "記者"    
## [307] "財報"     "貢獻"     "除了"     "高於"     "高峰"     "高通"    
## [313] "高階"     "高爾夫球" "高檔"     "動能"     "問題"     "啟動"    
## [319] "國內"     "國際"     "將上"     "將在"     "將有"     "將較"    
## [325] "帶來"     "帶動"     "強化"     "強勁"     "情況"     "採用"    
## [331] "接單"     "推升"     "推出"     "推估"     "淡季"     "淨利"    
## [337] "現金"     "產品"     "產品組合" "產品線"   "產能"     "產業"    
## [343] "產線"     "第一季"   "第二季"   "第三季"   "第四季"   "累計"    
## [349] "終場"     "船舶"     "處分"     "處理器"   "規模"     "設計"    
## [355] "設備"     "透過"     "逐步"     "通訊"     "通過"     "連續"    
## [361] "部分"     "部門"     "陸續"     "創下"     "創新"     "單月"    
## [367] "單季"     "報告"     "報價"     "報導"     "幅度"     "復甦"    
## [373] "提升"     "提供"     "提前"     "提高"     "普爾"     "景氣"    
## [379] "晶片"     "最大"     "最新"     "發表"     "發展"     "發電"    
## [385] "發酵"     "稅前"     "稅後"     "結構"     "華航"     "貶值"    
## [391] "費城"     "超過"     "進一步"   "進入"     "進行"     "量產"    
## [397] "開出"     "開始"     "開發"     "集團"     "雲端"     "順利"    
## [403] "傳出"     "傳統"     "匯兌"     "損失"     "新高"     "新聞"    
## [409] "新增"     "新廠"     "新興"     "新藥"     "業外"     "業者"    
## [415] "業務"     "業績"     "煙囪"     "照明"     "經濟"     "萬元"    
## [421] "萬噸"     "葉片"     "裝置"     "資金"     "資料"     "農曆"    
## [427] "運價"     "過去"     "道瓊"     "達方"     "達到"     "零組件"  
## [433] "電子"     "電池"     "電動車"   "電源"     "預估"     "預計"    
## [439] "預期"     "對於"     "旗下"     "暢旺"     "漲幅"     "管理"    
## [445] "維持"     "網路"     "製造"     "製程"     "認列"     "認為"    
## [451] "認證"     "遞延"     "銀行"     "需求"     "領域"     "價值"    
## [457] "價格"     "億元"     "億美元"   "增加"     "增長"     "廠商"    
## [463] "影響"     "數據"     "樂觀"     "標準"     "模組"     "歐洲"    
## [469] "歐美"     "調漲"     "調整"     "鄭盈芷"   "銷售"     "導致"    
## [475] "擁有"     "整合"     "整體"     "樹脂"     "機會"     "歷史"    
## [481] "激勵"     "積極"     "興農"     "融資"     "鋼價"     "隨著"    
## [487] "龍頭"     "優於"     "壓力"     "應用"     "營收"     "營收將"  
## [493] "營業"     "營運"     "獲利"     "獲得"     "環保"     "聯合"    
## [499] "聯德"     "臨床"     "虧損"     "趨勢"     "還有"     "還是"    
## [505] "鍵盤"     "雖然"     "擴大"     "轉投資"   "轉型"     "轉盈"    
## [511] "醫療"     "穩定"     "穩健"     "繳出"     "藥證"     "證券"    
## [517] "類股"     "寶利徠"   "繼續"     "蘋果"     "觸控"     "鐵礦石"  
## [523] "顯示"     "觀察"
findAssocs(dtm, '上漲', 0.5)
## $上漲
##      收盤      普爾      道瓊      那斯      終場      達克      指數 
##      0.75      0.75      0.75      0.73      0.73      0.73      0.72 
##      費城      標準    郭妍希    成分股       500      之冠      聞訊 
##      0.71      0.71      0.64      0.63      0.62      0.61      0.59 
##  appleinc      2330      跳漲      之賜      報告      恐慌      聯準 
##      0.58      0.57      0.57      0.56      0.56      0.55      0.55 
##      以來    彭博社        co       fed    分析師       adr      cboe 
##      0.54      0.54      0.53      0.53      0.53      0.52      0.52 
##      用來    芝加哥      研究    選擇權    barron      美元      聯邦 
##      0.52      0.52      0.52      0.52      0.51      0.51      0.51 
## intelcorp  三星電子 
##      0.50      0.50
news.content=  news[grepl('庫藏股', news$Content), ]
news.content[10,]$StockNo
## [1] 8039
news.content[10,]$Date
## [[1]]
## [1] "2014-11-03"
stockdf = getSymbols("8039.TW", auto.assign=FALSE)
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 101091 != reported length 200
stockdf$onedayafter     = getReturn(stockdf, -1)
stockdf$sevendaysafter  = getReturn(stockdf, -7)
stockdf$onemonthafter   = getReturn(stockdf, -30)
stockdf$onedaybefore    = getReturn(stockdf, 1)
stockdf$sevendaysbefore = getReturn(stockdf, 7)
stockdf$onemonthbefore  = getReturn(stockdf, 30)

stockdf['2014-11-03']
##            X8039.TW.Open X8039.TW.High X8039.TW.Low X8039.TW.Close
## 2014-11-03         44.95         45.35         44.5           44.6
##            X8039.TW.Volume X8039.TW.Adjusted onedayafter sevendaysafter
## 2014-11-03         4381000           40.4059 0.004484305    -0.03923767
##            onemonthafter onedaybefore sevendaysbefore onemonthbefore
## 2014-11-03   -0.02466368 -0.008888889      0.06190476     -0.1254902
stockdf['2014-10-01::2014-11-30', 4]
##            X8039.TW.Close
## 2014-10-01          48.20
## 2014-10-02          48.90
## 2014-10-03          49.55
## 2014-10-06          47.85
## 2014-10-07          47.50
## 2014-10-08          47.25
## 2014-10-09          44.00
## 2014-10-10          44.00
## 2014-10-13          40.95
## 2014-10-14          40.40
## 2014-10-15          41.20
## 2014-10-16          43.50
## 2014-10-17          42.30
## 2014-10-20          42.60
## 2014-10-21          42.10
## 2014-10-22          42.20
## 2014-10-23          42.00
## 2014-10-24          42.25
## 2014-10-27          40.35
## 2014-10-28          42.00
## 2014-10-29          42.80
## 2014-10-30          42.10
## 2014-10-31          45.00
## 2014-11-03          44.60
## 2014-11-04          44.80
## 2014-11-05          44.10
## 2014-11-06          43.00
## 2014-11-07          43.70
## 2014-11-10          43.25
## 2014-11-11          43.20
## 2014-11-12          42.85
## 2014-11-13          43.00
## 2014-11-14          43.05
## 2014-11-17          42.30
## 2014-11-18          41.15
## 2014-11-19          41.25
## 2014-11-20          42.90
## 2014-11-21          42.60
## 2014-11-24          42.05
## 2014-11-25          42.65
## 2014-11-26          43.20
## 2014-11-27          43.25
## 2014-11-28          42.75
chartSeries(stockdf['2014-10-01::2014-11-30'])

news.content=  news[grepl('恢復交易', news$Content), ]
news.content[4,]$Title
## [1] "新焦點TDR 4/20起恢復交易"
news.content[4,]$StockNo
## [1] 9106
news.content[10,]$Date
## [[1]]
## NULL
getPerformance <- function(stockNo, Date){
   stockdf = getSymbols(paste0(stockNo, ".TW"), auto.assign=FALSE)
   stockdf$onedayafter     = getReturn(stockdf, -1)
   stockdf$sevendaysafter  = getReturn(stockdf, -7)
   stockdf$onemonthafter   = getReturn(stockdf, -30)
   stockdf$onedaybefore    = getReturn(stockdf, 1)
   stockdf$sevendaysbefore = getReturn(stockdf, 7)
   stockdf$onemonthbefore  = getReturn(stockdf, 30)

   stockdf[Date[[1]]]
}


ary = c()
for (i in seq(1,nrow(news.content)) ){
  
  df = tryCatch({getPerformance(
     news.content[i,]$StockNo, news.content[i,]$Date
  )},  error=function(cond){} )
  ary = rbind(ary, df)
}
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 137792 != reported length 200
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 67016 != reported length 200

## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 67016 != reported length 200
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 73752 != reported length 200
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 76240 != reported length 200

## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 76240 != reported length 200
tw9106 = getSymbols("9106.TW", auto.assign=FALSE)
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 73752 != reported length 200
chart_Series(tw9106["2015-04-01::2015-06-01"])