準備
ディレクトリ” univ”から出現単語行列を作成
getFreqDir関数の読み込み
source("getFreqDir.R")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
univディレクトリ内の頻度表の作成
univTable <- getFreqDir("univ")
階層的クラスター分析
階層的クラスター分析: デフォルト値:complete法, euclidean距離))
hc <- hclust(dist(t(univTable)))
plot(hc)
rect.hclust(hc, k=3, border="red")

階層的クラスター分析: ward法 & キャンベラ距離
hc <- hclust(dist(t(univTable), method = "canberra"), method = "ward.D2")
plot(hc)
rect.hclust(hc, k=3, border="red")

階層的クラスター分析: ward法 & コサイン距離
hc <- hclust(dist(t(univTable), method = "cosine"), method = "ward.D2")
plot(hc)
rect.hclust(hc, k=3, border="red")
全テキストの上位15最頻単語の抽出
top15<-sort(rowSums(univTable),decreasing=TRUE)[1:15]
names(top15)
## [1] "the" "and" "of" "to" "university"
## [6] "in" "a" "as" "is" "that"
## [11] "with" "for" "osaka" "research" "society"
階層的クラスター分析(最頻上位15): ward法 & キャンベラ距離
TFtop15<-univTable[rownames(univTable) %in% names(top15),]
hc <- hclust(dist(t(TFtop15), method = "canberra"), method = "ward.D2")
plot(hc)
rect.hclust(hc, k=2, border="red")

単語間階層的クラスター分析(最頻上位15)
TFtop15<-univTable[rownames(univTable) %in% names(top15),]
hc <- hclust(dist(TFtop15, method = "canberra"), method = "ward.D2")
plot(hc)

wordcloud(複数文書比較)
library(wordcloud)
## Loading required package: RColorBrewer
比較文書列ラベル
colnames(univTable)
## [1] "kyoto1" "kyoto2" "osaka1" "osaka2" "osaka3" "osaka4" "tokyo1"
## [8] "tokyo2" "waseda1" "waseda2"
ncol(univTable)
## [1] 10
複数文書比較:comparison.cloud()の実行例1
comparison.cloud(univTable[,c(3,6)],max.words=20)

複数文書比較:comparison.cloud()の実行例2
comparison.cloud(univTable[,c(F,T,F,F,F,T,F,F,F,T)],max.words=20)

カラーパレット:RColorBrewer
head(brewer.pal.info[])
maxcolorsの値取得
rownames(brewer.pal.info[])
## [1] "BrBG" "PiYG" "PRGn" "PuOr" "RdBu" "RdGy"
## [7] "RdYlBu" "RdYlGn" "Spectral" "Accent" "Dark2" "Paired"
## [13] "Pastel1" "Pastel2" "Set1" "Set2" "Set3" "Blues"
## [19] "BuGn" "BuPu" "GnBu" "Greens" "Greys" "Oranges"
## [25] "OrRd" "PuBu" "PuBuGn" "PuRd" "Purples" "RdPu"
## [31] "Reds" "YlGn" "YlGnBu" "YlOrBr" "YlOrRd"
RColorBrewerの利用
binfo<-brewer.pal.info[]
palets <-rownames(binfo[binfo$maxcolors>10,])
tmpPalet = palets[1]
binfo[rownames(binfo)==tmpPalet,]
maxcolorsの値取得
(maxNum=binfo[rownames(binfo)==tmpPalet,][1])
wordcloud描画
wordcloud(rownames(univTable),univTable$osaka4,
colors=brewer.pal(as.integer(maxNum),tmpPalet))

パレットの変更
tmpPalet = palets[3]
(maxNum=binfo[rownames(binfo)==tmpPalet,][1])
wordcloud描画
wordcloud(rownames(univTable),univTable$osaka4,
colors=brewer.pal(as.integer(maxNum),tmpPalet),min.freq=4)
