library(wordcloud)
## Loading required package: RColorBrewer
library(proxy)
##
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
source("getFreqDir.R")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
univTable <- getFreqDir("univ")
head(univTable)
## hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo waseda
## to 11 17 26 11 17 11 15 35
## and 7 18 38 15 26 31 22 37
## the 7 25 36 16 31 32 50 39
## university 6 5 16 18 21 15 9 22
## hiroshima 5 0 0 0 0 0 0 0
## of 5 18 35 10 26 30 38 34
colnames(univTable)[6]
## [1] "osaka3"
head(univTable[,6])
## [1] 11 31 32 15 0 30
head(univTable$osaka3)
## [1] 11 31 32 15 0 30
sData <- univTable$osaka3[1:20]
sLabel <- rownames(univTable)[1:20]
wordcloud(sLabel,sData,colors=rainbow(10))
comparison.cloud(univTable[,c(4,6)],max.words=20)
colnames(univTable)
## [1] "hiroshima" "kufs" "kyoto" "osaka1" "osaka2" "osaka3"
## [7] "tokyo" "waseda"
comparison.cloud(univTable[,c(T,F,F,T,F,F,T,F)],max.words=20)
head(brewer.pal.info[])
## maxcolors category colorblind
## BrBG 11 div TRUE
## PiYG 11 div TRUE
## PRGn 11 div TRUE
## PuOr 11 div TRUE
## RdBu 11 div TRUE
## RdGy 11 div FALSE
rownames(brewer.pal.info[])
## [1] "BrBG" "PiYG" "PRGn" "PuOr" "RdBu" "RdGy"
## [7] "RdYlBu" "RdYlGn" "Spectral" "Accent" "Dark2" "Paired"
## [13] "Pastel1" "Pastel2" "Set1" "Set2" "Set3" "Blues"
## [19] "BuGn" "BuPu" "GnBu" "Greens" "Greys" "Oranges"
## [25] "OrRd" "PuBu" "PuBuGn" "PuRd" "Purples" "RdPu"
## [31] "Reds" "YlGn" "YlGnBu" "YlOrBr" "YlOrRd"
binfo<-brewer.pal.info[]
palets <-rownames(binfo[binfo$maxcolors>10,])
tmpPalet = palets[1]
binfo[rownames(binfo)==tmpPalet,]
## maxcolors category colorblind
## BrBG 11 div TRUE
(maxNum=binfo[rownames(binfo)==tmpPalet,][1])
## maxcolors
## BrBG 11
wordcloud(rownames(univTable),univTable$waseda,
colors=brewer.pal(as.integer(maxNum),tmpPalet))
tmpPalet = palets[3]
(maxNum=binfo[rownames(binfo)==tmpPalet,][1])
## maxcolors
## PRGn 11
wordcloud(rownames(univTable),univTable$waseda,
colors=brewer.pal(as.integer(maxNum),tmpPalet),min.freq=5)
\[Corr(x,y)= \frac{\sum (x_{i}-\overline{x}) (y_{i}-\overline{y})}{\sqrt{\sum (x_{i}-\overline{x})^2\sum (y_{i}-\overline{y})^2}} \]
tf1 <- getFreqDir("testData")
res <-cor(tf1)
round(res,2)
## test1 test2 test3 test4
## test1 1.00 -0.29 -0.38 -0.35
## test2 -0.29 1.00 0.80 0.96
## test3 -0.38 0.80 1.00 0.94
## test4 -0.35 0.96 0.94 1.00
corr <- simil(t(tf1))
round(corr, 2)
## test1 test2 test3
## test2 -0.29
## test3 -0.38 0.80
## test4 -0.35 0.96 0.94
corr <- simil(t(tf1), diag=T)
round(corr, 2)
## test1 test2 test3 test4
## test1 NA
## test2 -0.29 NA
## test3 -0.38 0.80 NA
## test4 -0.35 0.96 0.94 NA
\[Cos(x,y)= \frac{\sum x_{i} y_{i}}{\sqrt{\sum x_{i}^2\sum y_{i}^2}} \]
simil(t(tf1), method="cosine")
## test1 test2 test3
## test2 0.2526633
## test3 0.2628980 0.8973604
## test4 0.2643935 0.9764635 0.9714201
tf2 <- getFreqDir("univ")
res <-simil(t(tf2), method="cosine")
round(res,2)
## hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo
## kufs 0.65
## kyoto 0.73 0.81
## osaka1 0.68 0.65 0.77
## osaka2 0.69 0.73 0.83 0.84
## osaka3 0.66 0.77 0.88 0.81 0.90
## tokyo 0.62 0.75 0.81 0.72 0.80 0.84
## waseda 0.73 0.80 0.87 0.77 0.81 0.82 0.77
res2<-simil(t(tf2), method="cosine", diag=T)
res2<-as.matrix(res2)
res2[is.na(res2)] <- 1
round(res2,2)
## hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo waseda
## hiroshima 1.00 0.65 0.73 0.68 0.69 0.66 0.62 0.73
## kufs 0.65 1.00 0.81 0.65 0.73 0.77 0.75 0.80
## kyoto 0.73 0.81 1.00 0.77 0.83 0.88 0.81 0.87
## osaka1 0.68 0.65 0.77 1.00 0.84 0.81 0.72 0.77
## osaka2 0.69 0.73 0.83 0.84 1.00 0.90 0.80 0.81
## osaka3 0.66 0.77 0.88 0.81 0.90 1.00 0.84 0.82
## tokyo 0.62 0.75 0.81 0.72 0.80 0.84 1.00 0.77
## waseda 0.73 0.80 0.87 0.77 0.81 0.82 0.77 1.00
hc <- hclust(dist(t(tf2)))
plot(hc)
#rect.hclust(hc, k=3, border="red")
hc <- hclust(dist(t(tf2), method = "canberra"), method = "ward.D2")
plot(hc)
#rect.hclust(hc, k=3, border="red")
freq<-tf$hiroshima[1:50]
label<-rownames(tf)[1:50]
*brewer.pal関数の第1引数は10で固定