https://rpubs.com/koyobib/bm04 の続き
library(readr)
library(openxlsx)
library(stringr)
library(stringdist)
library(dplyr)
library(RMeCab)
library(igraph)
いろいろ調べてみたけど,うまく動かない。 以下のようにしてみた。
tar xf mecab-0.996.tar.gz
では展開できなかった。cd ~/Downloads
cd mecab-0.996
./configure –with-charset=utf8
make
sudo make install
cd ~/Downloads
cd mecab-ipadic-2.7.0-20070801
./configure –with-charset=utf8
make
sudo make install
あとは,このページに書いてある方法で,RMeCabをインストールすればよい。homebrewでMeCabをインストールすると,RMeCabがMeCabを見つけられないようだ。
jjep.49.65 <- read.csv('./JJEP/49_65.csv')
jjep.66.70 <- read.csv('./JJEP/66_70.csv')
jjep.49.70 <- dplyr::bind_rows(jjep.49.65, jjep.66.70)
# この際,保存しておく
write_csv(jjep.49.70, './JJEP/49_70.csv')
write(jjep.49.70$ti, file = "./JJEP/49_70_ti.txt")
jjep.49.70.ti.n1 <- docDF("./JJEP/49_70_ti.txt", type=1, pos = c("名詞"))
## file_name = ./JJEP/49_70_ti.txt opened
## number of extracted terms = 1404
## now making a data frame. wait a while!
# 降順で表にする
DT::datatable(jjep.49.70.ti.n1[order(jjep.49.70.ti.n1[,4],decreasing = TRUE),])
#write.csv(jjep.49.70.ti.n1, file = "./Res/49_70_ti_n1.csv")
write.xlsx(jjep.49.70.ti.n1, file = "./Res/49_70_ti_n1.xlsx")
ストップワードを設定
stopword <- c(",", ",", ")", "(", "</", "i", ">=", ",(",
"(<", ",「", ">", ":", ".", "~", ";", "/", "I",
"結果", "こと", "研究", "分析", "示唆", "明らか", "検討",
"可能", "性", "質問", "ため", "の", "これら", "群", "必要",
"それ", "行い", "これ", "ところ", "それぞれ", "いずれ",
"それら", "示唆", "多く", "&", "うち", "論文", "等",
"とき", "的", ",", "以上", "対象", "調査", "者", "感", "記述",
"先行", "面", "よう", "さ", "度", "位置付け", "有意",
"支持", "事前", "事後", "テスト", "仮説", "問題", "点",
"測定", "尺度", "型", "相関", "係数", "概念的",
"因子", "平均", "教育", "心理", "学",
"もの",
"・", ",『", "),", "-", "<", "(「", ")「", " )『",
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
"15", "60",
"A", "B")
jjep.49.70.ti.cooc.1 <- NgramDF("./JJEP/49_70_ti.txt", type=1, pos = c("名詞"))
## file = ./JJEP/49_70_ti.txt Ngram = 2
jjep.49.70.ti.cooc.2 <- subset(jjep.49.70.ti.cooc.1,!(Ngram1 %in% stopword) & !(Ngram2 %in% stopword))
jjep.49.70.ti.cooc.3 <- subset(jjep.49.70.ti.cooc.2, Freq > 0)
DT::datatable(jjep.49.70.ti.cooc.3[order(jjep.49.70.ti.cooc.3[,3], decreasing = TRUE),])
library(igraph)
jjep.49.70.ti.cooc.21 <- subset(jjep.49.70.ti.cooc.1,!(Ngram1 %in% stopword) & !(Ngram2 %in% stopword))
jjep.49.70.ti.cooc.22 <- subset(jjep.49.70.ti.cooc.21, Freq > 3)
jjep.49.70.ti.g <- graph.data.frame(jjep.49.70.ti.cooc.22, directed = FALSE)
plot(jjep.49.70.ti.g, vertex.size = 3, vertex.label.cex = 1.5,
vertex.label.family="HiraKakuProN-W3",
vertex.label = V(jjep.49.70.ti.g)$name, vertex.color = "yellow")