library(quantmod)
## Warning: package 'quantmod' was built under R version 3.2.5
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.2.5
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.2.5
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.2.5
## Version 0.4-0 included new data defaults. See ?getSymbols.
stockdf = getSymbols("2330.TW", auto.assign=FALSE)
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and
## auto.assign=TRUE by default.
##
## This behavior will be phased out in 0.5-0 when the call will
## default to use auto.assign=FALSE. getOption("getSymbols.env") and
## getOptions("getSymbols.auto.assign") are now checked for alternate defaults
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 128346 != reported length 200
# lag
head(cbind(stockdf[,4], lag(stockdf[,4],1)))
## X2330.TW.Close X2330.TW.Close.1
## 2007-01-02 67.3632 NA
## 2007-01-03 67.6617 67.3632
## 2007-01-04 67.3632 67.6617
## 2007-01-05 66.5673 67.3632
## 2007-01-08 65.4727 66.5673
## 2007-01-09 65.6717 65.4727
# daily return
daily_return= (stockdf[,4] - lag(stockdf[,4],1)) / lag(stockdf[,4],1)
getReturn <- function(stockdf,days = 1){
# from days return
if (days > 0){
ret = (stockdf[,4] - lag(stockdf[,4] , days)) / lag(stockdf[,4] , days)
}else{
ret = (lag(stockdf[,4] , days) - stockdf[,4] ) / stockdf[,4]
}
ret
}
daily_return = getReturn(stockdf, -2)
weekly_return = getReturn(stockdf, 7)
head(cbind(stockdf[,4], lag(stockdf[,4], -1), daily_return))
## X2330.TW.Close X2330.TW.Close.1 X2330.TW.Close.2
## 2007-01-02 67.3632 67.6617 0.000000000
## 2007-01-03 67.6617 67.3632 -0.016174586
## 2007-01-04 67.3632 66.5673 -0.028064284
## 2007-01-05 66.5673 65.4727 -0.013454053
## 2007-01-08 65.4727 65.6717 -0.024317005
## 2007-01-09 65.6717 63.8806 -0.003030225
# getReturn(stockdf, 7) , 7 days before return
# getReturn(stockdf, -2), 2 days after return
library(jiebaR)
## Warning: package 'jiebaR' was built under R version 3.2.5
## Loading required package: jiebaRD
## Warning: package 'jiebaRD' was built under R version 3.2.5
mixseg =worker()
news <- read.csv('moneydj201415_big5.csv', header=TRUE)
# Type Conversion
news$Content = as.character(news$Content)
news$DateTime = as.character(news$DateTime)
news$Title = as.character(news$Title)
# Add Date
news$Date = lapply(news$DateTime, function(e)
strsplit(e, ' ' )[[1]][1] )
news.content= news[grepl('激勵', news$Content), 'Content' ]
news.content = unique(news.content)
# use jiebar to segment words
library(jiebaR)
mixeg= worker()
content.all=lapply(news.content, function(e)segment(code=e, jiebar=mixseg))
# Build Document Term Matrix
library(tm)
## Loading required package: NLP
jieba_tokenizer=function(d){
unlist(segment(d[[1]],mixseg))
}
space_tokenizer=function(x){
unlist(strsplit(as.character(x[[1]]),'[[:space:]]+'))
}
doc=VCorpus(VectorSource(content.all))
doc=unlist(tm_map(doc,jieba_tokenizer),recursive=F)
doc=lapply(doc,function(d)paste(d,collapse=' '))
control.list=list(wordLengths=c(2,Inf),tokenize=space_tokenizer)
dtm=DocumentTermMatrix(Corpus(VectorSource(doc)),control=control.list)
findFreqTerms(dtm, 20)
## [1] "10" "11" "12" "13" "14" "15"
## [7] "16" "17" "18" "19" "20" "2012"
## [13] "2013" "2014" "2015" "21" "22" "24"
## [19] "25" "26" "27" "28" "30" "35"
## [25] "40" "50" "500" "eps" "ic" "iphone"
## [31] "led" "lineups" "moneydj" "mosfet" "nb" "on"
## [37] "pc" "q1" "q2" "q3" "q4" "ups"
## [43] "一年" "一度" "一個" "二成" "入帳" "力道"
## [49] "上半年" "上市" "上揚" "上游" "上漲" "上緯"
## [55] "下半年" "下跌" "下滑" "也將" "土地" "大田"
## [61] "大陸" "大幅" "大增" "大廠" "大關" "子公司"
## [67] "小幅" "工作" "工業" "已經" "不如" "不過"
## [73] "中心" "中信金" "中國" "中碳" "中鋼" "五成"
## [79] "今年" "公司" "公布" "公噸" "分別" "分析"
## [85] "分析師" "切入" "化工" "升至" "升溫" "反彈"
## [91] "天數" "太陽能" "巴西" "手機" "方面" "日本"
## [97] "日前" "日圓" "月份" "月營" "比重" "毛利率"
## [103] "水準" "主要" "代工" "以上" "以及" "以來"
## [109] "出現" "出貨" "出貨量" "加上" "加工" "包括"
## [115] "半導體" "占營收" "去年" "去年同期" "另外" "可以"
## [121] "可能" "可望" "台達電" "台積電" "台灣" "四成"
## [127] "外銷" "尼龍" "左右" "市占率" "市場" "布局"
## [133] "平台" "平均" "平板" "未來" "本季" "本業"
## [139] "正式" "永光" "永記" "生技" "生產" "用於"
## [145] "由於" "目前" "目標" "任何" "企業" "先前"
## [151] "光電" "全年" "全球" "再度" "合作" "合併"
## [157] "合資" "同步" "同時" "同期" "向上" "回升"
## [163] "回溫" "因此" "因素" "地區" "年底" "年度"
## [169] "年減" "年增" "成分股" "成本" "成立" "成長"
## [175] "成為" "收入" "收益" "收盤" "有利" "有望"
## [181] "此外" "自有" "至於" "行動" "行情" "估計"
## [187] "伺服器" "低於" "佔營收" "利用率" "利益" "利率"
## [193] "即將" "呈現" "完成" "技術" "投入" "投產"
## [199] "投資" "投資人" "改善" "杏昌" "材料" "每股"
## [205] "決定" "汽車" "系統" "肝癌" "走強" "走勢"
## [211] "事業" "亞洲" "併購" "來自" "來到" "來看"
## [217] "供應" "供應商" "供應鏈" "兩年" "其中" "其他"
## [223] "取得" "受到" "受惠" "奈米" "季增" "怡潔"
## [229] "拉貨" "旺季" "明年" "明顯" "服務" "東京"
## [235] "油脂" "油價" "治療" "法人" "狀況" "空間"
## [241] "股利" "股價" "表示" "表現" "近年" "近期"
## [247] "金風" "金融" "長期" "長榮" "長線" "長興"
## [253] "亮眼" "信驊" "前三季" "南僑" "品牌" "客戶"
## [259] "宣布" "封裝" "建議" "持平" "持股" "持續"
## [265] "指出" "指數" "挑戰" "政策" "春節" "染料"
## [271] "為主" "盈餘" "相當" "相較" "相對" "相關"
## [277] "看好" "研究" "突破" "紀錄" "美元" "美國"
## [283] "英特爾" "訂單" "計畫" "負極" "降低" "面板"
## [289] "風力" "風電" "首季" "原先" "原料" "展望"
## [295] "庫存" "挹注" "效益" "效應" "根據" "海外"
## [301] "消息" "能力" "航空" "航線" "衰退" "記者"
## [307] "財報" "貢獻" "除了" "高於" "高峰" "高通"
## [313] "高階" "高爾夫球" "高檔" "動能" "問題" "啟動"
## [319] "國內" "國際" "將上" "將在" "將有" "將較"
## [325] "帶來" "帶動" "強化" "強勁" "情況" "採用"
## [331] "接單" "推升" "推出" "推估" "淡季" "淨利"
## [337] "現金" "產品" "產品組合" "產品線" "產能" "產業"
## [343] "產線" "第一季" "第二季" "第三季" "第四季" "累計"
## [349] "終場" "船舶" "處分" "處理器" "規模" "設計"
## [355] "設備" "透過" "逐步" "通訊" "通過" "連續"
## [361] "部分" "部門" "陸續" "創下" "創新" "單月"
## [367] "單季" "報告" "報價" "報導" "幅度" "復甦"
## [373] "提升" "提供" "提前" "提高" "普爾" "景氣"
## [379] "晶片" "最大" "最新" "發表" "發展" "發電"
## [385] "發酵" "稅前" "稅後" "結構" "華航" "貶值"
## [391] "費城" "超過" "進一步" "進入" "進行" "量產"
## [397] "開出" "開始" "開發" "集團" "雲端" "順利"
## [403] "傳出" "傳統" "匯兌" "損失" "新高" "新聞"
## [409] "新增" "新廠" "新興" "新藥" "業外" "業者"
## [415] "業務" "業績" "煙囪" "照明" "經濟" "萬元"
## [421] "萬噸" "葉片" "裝置" "資金" "資料" "農曆"
## [427] "運價" "過去" "道瓊" "達方" "達到" "零組件"
## [433] "電子" "電池" "電動車" "電源" "預估" "預計"
## [439] "預期" "對於" "旗下" "暢旺" "漲幅" "管理"
## [445] "維持" "網路" "製造" "製程" "認列" "認為"
## [451] "認證" "遞延" "銀行" "需求" "領域" "價值"
## [457] "價格" "億元" "億美元" "增加" "增長" "廠商"
## [463] "影響" "數據" "樂觀" "標準" "模組" "歐洲"
## [469] "歐美" "調漲" "調整" "鄭盈芷" "銷售" "導致"
## [475] "擁有" "整合" "整體" "樹脂" "機會" "歷史"
## [481] "激勵" "積極" "興農" "融資" "鋼價" "隨著"
## [487] "龍頭" "優於" "壓力" "應用" "營收" "營收將"
## [493] "營業" "營運" "獲利" "獲得" "環保" "聯合"
## [499] "聯德" "臨床" "虧損" "趨勢" "還有" "還是"
## [505] "鍵盤" "雖然" "擴大" "轉投資" "轉型" "轉盈"
## [511] "醫療" "穩定" "穩健" "繳出" "藥證" "證券"
## [517] "類股" "寶利徠" "繼續" "蘋果" "觸控" "鐵礦石"
## [523] "顯示" "觀察"
findAssocs(dtm, '上漲', 0.5)
## $上漲
## 收盤 普爾 道瓊 那斯 終場 達克 指數
## 0.75 0.75 0.75 0.73 0.73 0.73 0.72
## 費城 標準 郭妍希 成分股 500 之冠 聞訊
## 0.71 0.71 0.64 0.63 0.62 0.61 0.59
## appleinc 2330 跳漲 之賜 報告 恐慌 聯準
## 0.58 0.57 0.57 0.56 0.56 0.55 0.55
## 以來 彭博社 co fed 分析師 adr cboe
## 0.54 0.54 0.53 0.53 0.53 0.52 0.52
## 用來 芝加哥 研究 選擇權 barron 美元 聯邦
## 0.52 0.52 0.52 0.52 0.51 0.51 0.51
## intelcorp 三星電子
## 0.50 0.50
news.content= news[grepl('庫藏股', news$Content), ]
news.content[10,]$StockNo
## [1] 8039
news.content[10,]$Date
## [[1]]
## [1] "2014-11-03"
stockdf = getSymbols("8039.TW", auto.assign=FALSE)
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 101091 != reported length 200
stockdf$onedayafter = getReturn(stockdf, -1)
stockdf$sevendaysafter = getReturn(stockdf, -7)
stockdf$onemonthafter = getReturn(stockdf, -30)
stockdf$onedaybefore = getReturn(stockdf, 1)
stockdf$sevendaysbefore = getReturn(stockdf, 7)
stockdf$onemonthbefore = getReturn(stockdf, 30)
stockdf['2014-11-03']
## X8039.TW.Open X8039.TW.High X8039.TW.Low X8039.TW.Close
## 2014-11-03 44.95 45.35 44.5 44.6
## X8039.TW.Volume X8039.TW.Adjusted onedayafter sevendaysafter
## 2014-11-03 4381000 40.4059 0.004484305 -0.03923767
## onemonthafter onedaybefore sevendaysbefore onemonthbefore
## 2014-11-03 -0.02466368 -0.008888889 0.06190476 -0.1254902
stockdf['2014-10-01::2014-11-30', 4]
## X8039.TW.Close
## 2014-10-01 48.20
## 2014-10-02 48.90
## 2014-10-03 49.55
## 2014-10-06 47.85
## 2014-10-07 47.50
## 2014-10-08 47.25
## 2014-10-09 44.00
## 2014-10-10 44.00
## 2014-10-13 40.95
## 2014-10-14 40.40
## 2014-10-15 41.20
## 2014-10-16 43.50
## 2014-10-17 42.30
## 2014-10-20 42.60
## 2014-10-21 42.10
## 2014-10-22 42.20
## 2014-10-23 42.00
## 2014-10-24 42.25
## 2014-10-27 40.35
## 2014-10-28 42.00
## 2014-10-29 42.80
## 2014-10-30 42.10
## 2014-10-31 45.00
## 2014-11-03 44.60
## 2014-11-04 44.80
## 2014-11-05 44.10
## 2014-11-06 43.00
## 2014-11-07 43.70
## 2014-11-10 43.25
## 2014-11-11 43.20
## 2014-11-12 42.85
## 2014-11-13 43.00
## 2014-11-14 43.05
## 2014-11-17 42.30
## 2014-11-18 41.15
## 2014-11-19 41.25
## 2014-11-20 42.90
## 2014-11-21 42.60
## 2014-11-24 42.05
## 2014-11-25 42.65
## 2014-11-26 43.20
## 2014-11-27 43.25
## 2014-11-28 42.75
chartSeries(stockdf['2014-10-01::2014-11-30'])

news.content= news[grepl('恢復交易', news$Content), ]
news.content[4,]$Title
## [1] "新焦點TDR 4/20起恢復交易"
news.content[4,]$StockNo
## [1] 9106
news.content[10,]$Date
## [[1]]
## NULL
getPerformance <- function(stockNo, Date){
stockdf = getSymbols(paste0(stockNo, ".TW"), auto.assign=FALSE)
stockdf$onedayafter = getReturn(stockdf, -1)
stockdf$sevendaysafter = getReturn(stockdf, -7)
stockdf$onemonthafter = getReturn(stockdf, -30)
stockdf$onedaybefore = getReturn(stockdf, 1)
stockdf$sevendaysbefore = getReturn(stockdf, 7)
stockdf$onemonthbefore = getReturn(stockdf, 30)
stockdf[Date[[1]]]
}
ary = c()
for (i in seq(1,nrow(news.content)) ){
df = tryCatch({getPerformance(
news.content[i,]$StockNo, news.content[i,]$Date
)}, error=function(cond){} )
ary = rbind(ary, df)
}
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 137792 != reported length 200
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 67016 != reported length 200
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 67016 != reported length 200
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 73752 != reported length 200
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 76240 != reported length 200
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 76240 != reported length 200
tw9106 = getSymbols("9106.TW", auto.assign=FALSE)
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 73752 != reported length 200
chart_Series(tw9106["2015-04-01::2015-06-01"])
