johnson — Apr 1, 2014, 1:58 PM
# Simple example for Text mining X ECFA
# install.packages("tm")
# install.packages("tmcn")
# install.packages("Rwordseg", repos = "http://R-Forge.R-project.org", type = "source")
library(tm)
library(tmcn)
# tmcn Version: 0.1-2
library(Rwordseg)
Loading required package: rJava
# Version: 0.2-1
input <- segmentCN('公民團體與學生代表今天召開330凱道遊行行前記者會,強調本次遊行秉持「和平、非暴力」原則,將從立法院「延伸」到凱道,地球公民基金會執行長李根政否認佔領總統府的可能,也強調一定在七點活動結束後,引領聲援群眾回到「精神堡壘」的立法院,呼籲警方不要趁機挑釁。而總統馬英九原定29日一早在總統府內對外的說明,記者會延到下午三點召開。
明日遊行將於下午一點在凱道集合,先靜坐兩小時,接著由各界代表、產業代表、罷課師生、藝文團體等在凱道舞台接力聲援。同時,中山南路也將規劃為「公民憲政大道」,邀請教育、人權、性別、勞工、居住權等NGO擺攤。活動預估超過十萬人,將派出300名以上糾察隊維持秩序。學生代表陳為廷說,光是台中就有80輛遊覽車要北上聲援,全球10幾個國際城市也將在同一時間串聯,隔海聲援。
針對近期馬政府回應服貿可以有監督機制,律師賴中強表示,從去年六月服貿公佈至今,他們一直堅持「監督法制化」,應由立法院立法監督,而不是行政院長江宜樺所宣稱的四階段行政命令。民間版兩岸締結條例草案也已經通過提案門檻。陳為廷則指責馬政府雖承諾可立法、可審查,但立法卻又不適用於服貿。馬政府依然迴避訴求,佔領立院至今12天,抗議民眾已經「退無可退」,一定要上街創造歷史。
')
input
[1] "公民" "團體" "與" "學生" "代表" "今天" "召開"
[8] "330" "凱" "道" "遊" "行" "行前" "記者會"
[15] "強調" "本次" "遊" "行" "秉" "持" "和平"
[22] "非暴力" "原則" "將" "從" "立" "法院" "延伸"
[29] "到" "凱" "道" "地球" "公民" "基金會" "執行"
[36] "長" "李" "根" "政" "否認" "佔領" "總統府"
[43] "的" "可能" "也" "強調" "一" "定" "在"
[50] "七點" "活動" "結束" "後" "引" "領" "聲援"
[57] "群眾" "回到" "精神" "堡壘" "的" "立" "法院"
[64] "呼" "籲" "警方" "不要" "趁機" "挑釁" "而"
[71] "總統" "馬" "英" "九" "原定" "29日" "一"
[78] "早" "在" "總統府" "內" "對" "外" "的"
[85] "說明" "記者會" "延" "到" "下午" "三點" "召開"
[92] "明日" "遊" "行將" "於" "下午" "一點" "在"
[99] "凱" "道" "集合" "先" "靜坐" "兩" "小時"
[106] "接" "著" "由" "各界" "代表" "產業" "代表"
[113] "罷課" "師生" "藝" "文" "團體" "等" "在"
[120] "凱" "道" "舞台" "接力" "聲援" "同時" "中"
[127] "山" "南" "路" "也" "將" "規劃" "為"
[134] "公民" "憲政" "大道" "邀請" "教育" "人權" "性別"
[141] "勞工" "居住" "權" "等" "NGO" "擺攤" "活動"
[148] "預估" "超過" "十萬" "人" "將" "派出" "300名"
[155] "以上" "糾察隊" "維持" "秩序" "學生" "代表" "陳"
[162] "為" "廷" "說" "光是" "台" "中" "就"
[169] "有" "80輛" "遊" "覽" "車" "要" "北上"
[176] "聲援" "全球" "10幾個" "國際" "城市" "也" "將"
[183] "在" "同" "一時間" "串聯" "隔" "海" "聲援"
[190] "針對" "近期" "馬" "政府" "回應" "服" "貿"
[197] "可以" "有" "監督" "機制" "律師" "賴" "中"
[204] "強" "表示" "從" "去年" "六月" "服" "貿"
[211] "公" "佈" "至今" "他們" "一直" "堅持" "監督"
[218] "法制化" "應" "由" "立" "法院" "立法" "監督"
[225] "而" "不" "是" "行政院" "長江" "宜" "樺"
[232] "所" "宣稱" "的" "四" "階段" "行政" "命令"
[239] "民間" "版" "兩岸" "締結" "條例" "草案" "也"
[246] "已經" "通過" "提案" "門檻" "陳" "為" "廷"
[253] "則" "指責" "馬" "政府" "雖" "承諾" "可"
[260] "立法" "可" "審查" "但" "立法" "卻" "又"
[267] "不" "適用" "於" "服" "貿" "馬" "政府"
[274] "依然" "迴" "避" "訴" "求" "佔領" "立"
[281] "院" "至今" "12天" "抗議" "民眾" "已經" "退"
[288] "無可" "退" "一定" "要" "上街" "創造" "歷史"
d.corpus <- Corpus(VectorSource(input)) # 建立語料庫
tdm <- TermDocumentMatrix(d.corpus, control = list(wordLengths = c(2, Inf)))
m1 <- as.matrix(tdm)
v <- sort(rowSums(m1), decreasing = TRUE)
d <- data.frame(word = names(v), freq = v)
head(d,10)
word freq
代表 代表 4
聲援 聲援 4
法院 法院 3
公民 公民 3
監督 監督 3
立法 立法 3
政府 政府 3
活動 活動 2
記者會 記者會 2
強調 強調 2