Lecture11: ネットワーク描画
実習内容
Lecture09で作成したTF-IDF重みづけ単語文書行列を利用して、記事ID-単語ペア情報の作成し、その結果をネットワーク(グラフ図)として描画する
パッケージのインストール
install.packages("igraph", dependencies = TRUE)
install.packages("networkD3", dependencies = TRUE)
パッケージの読み込み
library(igraph)
library(networkD3)
オンライン記事のTerm-Document Matrixの作成(cf. Lecture09)
自作関数ファイルの読み込み
source("func4lec09.R")
分析テキストURL
article_urls <- c()
article_urls <- readLines("Lec09_ArticleURL_info")
Term-Document Matrixの作成
contents <- lapply(article_urls, getArticleContent)
tmMtx <- getDocumentTermMTX(contents, term="lemma")
Processed document 10 of 12
相対頻度行列 (frequency per thousand)
relative_tmMtx <- sweep(tmMtx, MARGIN = 2, colSums(tmMtx), FUN = "/")*1000
文書単語行列(相対頻度 & TF-IDF)
文書頻度(Dcument Frequency:DF)
DF<-apply(tmMtx, 1, function(x) length(x[x>0]))
文書数
(N<-ncol(tmMtx))
[1] 12
TF-IDF
\[w=tf*log(\frac{N}{df}) \]
relative_tfidf <- relative_tmMtx*(log(N/DF))
全テキストの上位15最頻単語の抽出
記事ID: 3
id = 3
min.freq = 0
top_Num = 15
(top_rowSum <-sort(rowSums(relative_tfidf),decreasing=TRUE)[1:top_Num])
he Okayama Leicester Ishiba cancer intestine Miura yen party
97.96557 73.90057 64.71111 58.23182 57.12429 57.12429 54.41401 53.00304 51.86325
Osaka item they gene building League
51.53079 50.16594 49.31568 47.60358 44.65349 42.73060
記事ID-単語ペア情報の作成
記事ID: 3
tfidfMtx.topNum<-relative_tfidf[rownames(relative_tfidf) %in% names(top_rowSum),]
tfidfMtx.topNum
tmp_freq <- tfidfMtx.topNum[tfidfMtx.topNum[id]>min.freq,][,id]
tmp_wordLst <- rownames(tfidfMtx.topNum[tfidfMtx.topNum[id]>min.freq,])
article_id<-rep(colnames(tfidfMtx.topNum)[id],length(tmp_wordLst))
cbind(article_id, tmp_wordLst, tmp_freq)
article_id tmp_wordLst tmp_freq
[1,] "3" "he" "46.691665992208"
[2,] "3" "Ishiba" "46.2098120373297"
[3,] "3" "party" "18.4839248149319"
記事ID-単語ペア情報の作成
12記事
pairs <- c()
for(i in 1:12){
Freq <- tfidfMtx.topNum[tfidfMtx.topNum[i]>min.freq,][,i]
Term <- rownames(tfidfMtx.topNum[tfidfMtx.topNum[i]>min.freq,])
ArticleId<-rep(colnames(tfidfMtx.topNum)[i],length(Term))
pairs <- rbind(pairs,cbind(ArticleId, Term, Freq))
}
length(pairs)
[1] 108
ネットワーク描画
igraphで描画
wng<-as.undirected(graph.data.frame(pairs))
plot(wng)

ノード(node)の大きさを調整
wng<-as.undirected(graph.data.frame(pairs))
deg<-degree(wng)
plot(wng, vertex.size=20*(deg/max(deg)))

エッジ幅(重みづけ値)とノードの大きさを調整
wng<-as.undirected(graph.data.frame(pairs))
deg<-degree(wng)
E(wng)$weight<-as.numeric(as.numeric(pairs[,3]))
plot(wng, vertex.size=20*(deg/max(deg)), edge.width=10*E(wng)$weight/max(E(wng)$weight))

ノードの形状と色を変更
#Set default shape & color
V(wng)$shape <- "circle"
V(wng)$color <- "orange"
#Set default shape & color for the first 12 nodes, or article IDs
V(wng)$shape[1:12] <- "square"
V(wng)$color[1:12] <- "lightblue"
plot(wng)

networkD3形式に変換
network_d3_format <- igraph_to_networkD3(wng)
network_d3_format$nodes$group <- 1
ネットワーク描画
forceNetwork(Links = network_d3_format$links, Nodes = network_d3_format$nodes,
Source = 'source', Target = 'target',
NodeID = 'name', Group = 'group', fontSize = 14)
LS0tCnRpdGxlOiAiTGVjMTE6IChGYWxsIDIwMjQpIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKZWRpdG9yX29wdGlvbnM6IAogIGNodW5rX291dHB1dF90eXBlOiBpbmxpbmUKLS0tCgojIExlY3R1cmUxMTog44ON44OD44OI44Ov44O844Kv5o+P55S7CiMjIOWun+e/kuWGheWuuQpgYGAKTGVjdHVyZTA544Gn5L2c5oiQ44GX44GfVEYtSURG6YeN44G/44Gl44GR5Y2Y6Kqe5paH5pu46KGM5YiX44KS5Yip55So44GX44Gm44CB6KiY5LqLSUQt5Y2Y6Kqe44Oa44Ki5oOF5aCx44Gu5L2c5oiQ44GX44CB44Gd44Gu57WQ5p6c44KS44ON44OD44OI44Ov44O844Kv77yI44Kw44Op44OV5Zuz77yJ44Go44GX44Gm5o+P55S744GZ44KLCmBgYAoKIyMg5Y+C6ICD6LOH5paZCi0gPGEgaHJlZj0iaHR0cHM6Ly9yLmlncmFwaC5vcmcvIiB0YXJnZXQ9Il9ibGFuayI+aWdyYXBoPC9hPiBwYWNrYWdlCi0gPGEgaHJlZj0iaHR0cHM6Ly9jaHJpc3RvcGhlcmdhbmRydWQuZ2l0aHViLmlvL25ldHdvcmtEMy8iIHRhcmdldD0iX2JsYW5rIj5uZXR3b3JrRDM8L2E+IHBhY2thZ2UKLSA8YSBocmVmPSJodHRwczovL2dpdGh1Yi5jb20vY2hyaXN0b3BoZXJnYW5kcnVkL25ldHdvcmtEMy90cmVlL21hc3RlciIgdGFyZ2V0PSJfYmxhbmsiPm5ldHdvcmtEMzwvYT5AZ2l0aHViCgojIyDjg5Hjg4PjgrHjg7zjgrjjga7jgqTjg7Pjgrnjg4jjg7zjg6sKYGBge3IsIGV2YWw9RkFMU0V9Cmluc3RhbGwucGFja2FnZXMoImlncmFwaCIsIGRlcGVuZGVuY2llcyA9IFRSVUUpCmluc3RhbGwucGFja2FnZXMoIm5ldHdvcmtEMyIsIGRlcGVuZGVuY2llcyA9IFRSVUUpCmBgYAoKIyMg44OR44OD44Kx44O844K444Gu6Kqt44G/6L6844G/CmBgYHtyfQpsaWJyYXJ5KGlncmFwaCkKbGlicmFyeShuZXR3b3JrRDMpCmBgYAoKIyMg44Kq44Oz44Op44Kk44Oz6KiY5LqL44GuVGVybS1Eb2N1bWVudCBNYXRyaXjjga7kvZzmiJDvvIhjZi4gTGVjdHVyZTA577yJCiMjIyDoh6rkvZzplqLmlbDjg5XjgqHjgqTjg6vjga7oqq3jgb/ovrzjgb8KYGBge3J9CnNvdXJjZSgiZnVuYzRsZWMwOS5SIikKYGBgCgojIyMg5YiG5p6Q44OG44Kt44K544OIVVJMCmBgYHtyfQphcnRpY2xlX3VybHMgPC0gYygpCmFydGljbGVfdXJscyA8LSByZWFkTGluZXMoIkxlYzA5X0FydGljbGVVUkxfaW5mbyIpCmBgYAoKIyMjIFRlcm0tRG9jdW1lbnQgTWF0cml444Gu5L2c5oiQCmBgYHtyfQpjb250ZW50cyA8LSBsYXBwbHkoYXJ0aWNsZV91cmxzLCBnZXRBcnRpY2xlQ29udGVudCkKdG1NdHggPC0gZ2V0RG9jdW1lbnRUZXJtTVRYKGNvbnRlbnRzLCB0ZXJtPSJsZW1tYSIpCmBgYAoKIyMjIOebuOWvvumgu+W6puihjOWIlyAoZnJlcXVlbmN5IHBlciB0aG91c2FuZCkKYGBge3J9CnJlbGF0aXZlX3RtTXR4IDwtIHN3ZWVwKHRtTXR4LCBNQVJHSU4gPSAyLCBjb2xTdW1zKHRtTXR4KSwgRlVOID0gIi8iKSoxMDAwCmBgYAoKIyMjIOaWh+abuOWNmOiqnuihjOWIlyjnm7jlr77poLvluqYgJiBURi1JREYpIAojIyMjIOaWh+abuOmgu+W6pihEY3VtZW50IEZyZXF1ZW5jeTpERikKYGBge3J9CkRGPC1hcHBseSh0bU10eCwgMSwgZnVuY3Rpb24oeCkgbGVuZ3RoKHhbeD4wXSkpCmBgYAoKIyMjIyDmlofmm7jmlbAKYGBge3J9CihOPC1uY29sKHRtTXR4KSkKYGBgCgojIyMgVEYtSURGCiQkdz10Zipsb2coXGZyYWN7Tn17ZGZ9KSAkJApgYGB7cn0KcmVsYXRpdmVfdGZpZGYgPC0gcmVsYXRpdmVfdG1NdHgqKGxvZyhOL0RGKSkKYGBgCgojIyDlhajjg4bjgq3jgrnjg4jjga7kuIrkvY0xNeacgOmgu+WNmOiqnuOBruaKveWHugojIyMg6KiY5LqLSUQ6IDMKYGBge3J9CmlkID0gMwptaW4uZnJlcSA9IDAKdG9wX051bSA9IDE1Cih0b3Bfcm93U3VtIDwtc29ydChyb3dTdW1zKHJlbGF0aXZlX3RmaWRmKSxkZWNyZWFzaW5nPVRSVUUpWzE6dG9wX051bV0pCmBgYAojIyMg6KiY5LqLSUQt5Y2Y6Kqe44Oa44Ki5oOF5aCx44Gu5L2c5oiQCiMjIyDoqJjkuotJRDogMwpgYGB7cn0KdGZpZGZNdHgudG9wTnVtPC1yZWxhdGl2ZV90ZmlkZltyb3duYW1lcyhyZWxhdGl2ZV90ZmlkZikgJWluJSBuYW1lcyh0b3Bfcm93U3VtKSxdCnRmaWRmTXR4LnRvcE51bQp0bXBfZnJlcSA8LSB0ZmlkZk10eC50b3BOdW1bdGZpZGZNdHgudG9wTnVtW2lkXT5taW4uZnJlcSxdWyxpZF0KdG1wX3dvcmRMc3QgPC0gcm93bmFtZXModGZpZGZNdHgudG9wTnVtW3RmaWRmTXR4LnRvcE51bVtpZF0+bWluLmZyZXEsXSkKYXJ0aWNsZV9pZDwtcmVwKGNvbG5hbWVzKHRmaWRmTXR4LnRvcE51bSlbaWRdLGxlbmd0aCh0bXBfd29yZExzdCkpCmNiaW5kKGFydGljbGVfaWQsIHRtcF93b3JkTHN0LCB0bXBfZnJlcSkKYGBgCgojIyMg6KiY5LqLSUQt5Y2Y6Kqe44Oa44Ki5oOF5aCx44Gu5L2c5oiQCiMjIyAxMuiomOS6iwpgYGB7cn0KcGFpcnMgPC0gYygpCmZvcihpIGluIDE6MTIpewogIEZyZXEgPC0gdGZpZGZNdHgudG9wTnVtW3RmaWRmTXR4LnRvcE51bVtpXT5taW4uZnJlcSxdWyxpXQogIFRlcm0gPC0gcm93bmFtZXModGZpZGZNdHgudG9wTnVtW3RmaWRmTXR4LnRvcE51bVtpXT5taW4uZnJlcSxdKQogIEFydGljbGVJZDwtcmVwKGNvbG5hbWVzKHRmaWRmTXR4LnRvcE51bSlbaV0sbGVuZ3RoKFRlcm0pKQogIHBhaXJzIDwtIHJiaW5kKHBhaXJzLGNiaW5kKEFydGljbGVJZCwgVGVybSwgRnJlcSkpCn0KbGVuZ3RoKHBhaXJzKQpgYGAKIyMjIFZpZXfjgafnorroqo0KYGBge3J9ClZpZXcocGFpcnMpCmBgYAoKIyMg44ON44OD44OI44Ov44O844Kv5o+P55S7CiMjIyBpZ3JhcGjjgafmj4/nlLsKYGBge3J9CnduZzwtYXMudW5kaXJlY3RlZChncmFwaC5kYXRhLmZyYW1lKHBhaXJzKSkKcGxvdCh3bmcpCmBgYAoKIyMjIOODjuODvOODiShub2RlKeOBruWkp+OBjeOBleOCkuiqv+aVtAoqIOODjuODvOODieOBruWkp+OBjeOBlTogZGVncmVlKOasoeaVsO+8iQpgYGB7cn0Kd25nPC1hcy51bmRpcmVjdGVkKGdyYXBoLmRhdGEuZnJhbWUocGFpcnMpKQpkZWc8LWRlZ3JlZSh3bmcpCnBsb3Qod25nLCB2ZXJ0ZXguc2l6ZT0yMCooZGVnL21heChkZWcpKSkKYGBgCgojIyMg44Ko44OD44K45bmF77yI6YeN44G/44Gl44GR5YCk77yJ44Go44OO44O844OJ44Gu5aSn44GN44GV44KS6Kq/5pW0CmBgYHtyfQp3bmc8LWFzLnVuZGlyZWN0ZWQoZ3JhcGguZGF0YS5mcmFtZShwYWlycykpCmRlZzwtZGVncmVlKHduZykKRSh3bmcpJHdlaWdodDwtYXMubnVtZXJpYyhhcy5udW1lcmljKHBhaXJzWywzXSkpCnBsb3Qod25nLCB2ZXJ0ZXguc2l6ZT0yMCooZGVnL21heChkZWcpKSwgZWRnZS53aWR0aD0xMCpFKHduZykkd2VpZ2h0L21heChFKHduZykkd2VpZ2h0KSkKYGBgCiMjIyDjg47jg7zjg4njga7lvaLnirbjgajoibLjgpLlpInmm7QKYGBge3J9CiNTZXQgZGVmYXVsdCBzaGFwZSAmIGNvbG9yClYod25nKSRzaGFwZSA8LSAiY2lyY2xlIgpWKHduZykkY29sb3IgPC0gIm9yYW5nZSIKCiNTZXQgZGVmYXVsdCBzaGFwZSAmIGNvbG9yIGZvciB0aGUgZmlyc3QgMTIgbm9kZXMsIG9yIGFydGljbGUgSURzClYod25nKSRzaGFwZVsxOjEyXSA8LSAic3F1YXJlIiAKVih3bmcpJGNvbG9yWzE6MTJdIDwtICJsaWdodGJsdWUiCgpwbG90KHduZykKYGBgCiMjIyBuZXR3b3JrRDPlvaLlvI/jgavlpInmj5sKYGBge3J9Cm5ldHdvcmtfZDNfZm9ybWF0IDwtIGlncmFwaF90b19uZXR3b3JrRDMod25nKQpuZXR3b3JrX2QzX2Zvcm1hdCRub2RlcyRncm91cCA8LSAxCmBgYAoKIyMjIOODjeODg+ODiOODr+ODvOOCr+aPj+eUuwpgYGB7cn0KZm9yY2VOZXR3b3JrKExpbmtzID0gbmV0d29ya19kM19mb3JtYXQkbGlua3MsIE5vZGVzID0gbmV0d29ya19kM19mb3JtYXQkbm9kZXMsIAogICAgICAgICAgICAgU291cmNlID0gJ3NvdXJjZScsIFRhcmdldCA9ICd0YXJnZXQnLCAKICAgICAgICAgICAgIE5vZGVJRCA9ICduYW1lJywgR3JvdXAgPSAnZ3JvdXAnLCBmb250U2l6ZSA9IDE0KQpgYGAKCgoKCgo=