example.R

johnson — Apr 1, 2014, 1:58 PM

# Simple example for Text mining X ECFA

# install.packages("tm")
# install.packages("tmcn")
# install.packages("Rwordseg", repos = "http://R-Forge.R-project.org", type = "source")

library(tm)
library(tmcn)
# tmcn Version: 0.1-2
library(Rwordseg)
Loading required package: rJava
# Version: 0.2-1

input <- segmentCN('公民團體與學生代表今天召開330凱道遊行行前記者會,強調本次遊行秉持「和平、非暴力」原則,將從立法院「延伸」到凱道,地球公民基金會執行長李根政否認佔領總統府的可能,也強調一定在七點活動結束後,引領聲援群眾回到「精神堡壘」的立法院,呼籲警方不要趁機挑釁。而總統馬英九原定29日一早在總統府內對外的說明,記者會延到下午三點召開。
明日遊行將於下午一點在凱道集合,先靜坐兩小時,接著由各界代表、產業代表、罷課師生、藝文團體等在凱道舞台接力聲援。同時,中山南路也將規劃為「公民憲政大道」,邀請教育、人權、性別、勞工、居住權等NGO擺攤。活動預估超過十萬人,將派出300名以上糾察隊維持秩序。學生代表陳為廷說,光是台中就有80輛遊覽車要北上聲援,全球10幾個國際城市也將在同一時間串聯,隔海聲援。
針對近期馬政府回應服貿可以有監督機制,律師賴中強表示,從去年六月服貿公佈至今,他們一直堅持「監督法制化」,應由立法院立法監督,而不是行政院長江宜樺所宣稱的四階段行政命令。民間版兩岸締結條例草案也已經通過提案門檻。陳為廷則指責馬政府雖承諾可立法、可審查,但立法卻又不適用於服貿。馬政府依然迴避訴求,佔領立院至今12天,抗議民眾已經「退無可退」,一定要上街創造歷史。
                   ')
input
  [1] "公民"   "團體"   "與"     "學生"   "代表"   "今天"   "召開"  
  [8] "330"    "凱"     "道"     "遊"     "行"     "行前"   "記者會"
 [15] "強調"   "本次"   "遊"     "行"     "秉"     "持"     "和平"  
 [22] "非暴力" "原則"   "將"     "從"     "立"     "法院"   "延伸"  
 [29] "到"     "凱"     "道"     "地球"   "公民"   "基金會" "執行"  
 [36] "長"     "李"     "根"     "政"     "否認"   "佔領"   "總統府"
 [43] "的"     "可能"   "也"     "強調"   "一"     "定"     "在"    
 [50] "七點"   "活動"   "結束"   "後"     "引"     "領"     "聲援"  
 [57] "群眾"   "回到"   "精神"   "堡壘"   "的"     "立"     "法院"  
 [64] "呼"     "籲"     "警方"   "不要"   "趁機"   "挑釁"   "而"    
 [71] "總統"   "馬"     "英"     "九"     "原定"   "29日"   "一"    
 [78] "早"     "在"     "總統府" "內"     "對"     "外"     "的"    
 [85] "說明"   "記者會" "延"     "到"     "下午"   "三點"   "召開"  
 [92] "明日"   "遊"     "行將"   "於"     "下午"   "一點"   "在"    
 [99] "凱"     "道"     "集合"   "先"     "靜坐"   "兩"     "小時"  
[106] "接"     "著"     "由"     "各界"   "代表"   "產業"   "代表"  
[113] "罷課"   "師生"   "藝"     "文"     "團體"   "等"     "在"    
[120] "凱"     "道"     "舞台"   "接力"   "聲援"   "同時"   "中"    
[127] "山"     "南"     "路"     "也"     "將"     "規劃"   "為"    
[134] "公民"   "憲政"   "大道"   "邀請"   "教育"   "人權"   "性別"  
[141] "勞工"   "居住"   "權"     "等"     "NGO"    "擺攤"   "活動"  
[148] "預估"   "超過"   "十萬"   "人"     "將"     "派出"   "300名" 
[155] "以上"   "糾察隊" "維持"   "秩序"   "學生"   "代表"   "陳"    
[162] "為"     "廷"     "說"     "光是"   "台"     "中"     "就"    
[169] "有"     "80輛"   "遊"     "覽"     "車"     "要"     "北上"  
[176] "聲援"   "全球"   "10幾個" "國際"   "城市"   "也"     "將"    
[183] "在"     "同"     "一時間" "串聯"   "隔"     "海"     "聲援"  
[190] "針對"   "近期"   "馬"     "政府"   "回應"   "服"     "貿"    
[197] "可以"   "有"     "監督"   "機制"   "律師"   "賴"     "中"    
[204] "強"     "表示"   "從"     "去年"   "六月"   "服"     "貿"    
[211] "公"     "佈"     "至今"   "他們"   "一直"   "堅持"   "監督"  
[218] "法制化" "應"     "由"     "立"     "法院"   "立法"   "監督"  
[225] "而"     "不"     "是"     "行政院" "長江"   "宜"     "樺"    
[232] "所"     "宣稱"   "的"     "四"     "階段"   "行政"   "命令"  
[239] "民間"   "版"     "兩岸"   "締結"   "條例"   "草案"   "也"    
[246] "已經"   "通過"   "提案"   "門檻"   "陳"     "為"     "廷"    
[253] "則"     "指責"   "馬"     "政府"   "雖"     "承諾"   "可"    
[260] "立法"   "可"     "審查"   "但"     "立法"   "卻"     "又"    
[267] "不"     "適用"   "於"     "服"     "貿"     "馬"     "政府"  
[274] "依然"   "迴"     "避"     "訴"     "求"     "佔領"   "立"    
[281] "院"     "至今"   "12天"   "抗議"   "民眾"   "已經"   "退"    
[288] "無可"   "退"     "一定"   "要"     "上街"   "創造"   "歷史"  
d.corpus <- Corpus(VectorSource(input)) # 建立語料庫
tdm <- TermDocumentMatrix(d.corpus, control = list(wordLengths = c(2, Inf)))
m1 <- as.matrix(tdm)
v <- sort(rowSums(m1), decreasing = TRUE)
d <- data.frame(word = names(v), freq = v)
head(d,10)
         word freq
代表     代表    4
聲援     聲援    4
法院     法院    3
公民     公民    3
監督     監督    3
立法     立法    3
政府     政府    3
活動     活動    2
記者會 記者會    2
強調     強調    2