ui画面には以下のinput機能を搭載
1. 距離行列計算法の選択:"euclidean", "canberra", "manhattan"
2. クラスター間距離計算法の選択:"average", "complete", "ward"
3. 樹形図内長方形の数:2-6
source("getFreqDir.R")
``` ### proxyパッケージのインストール
install.packages("proxy")
library(proxy)
\[Corr(x,y)= \frac{\sum (x_{i}-\overline{x}) (y_{i}-\overline{y})}{\sqrt{\sum (x_{i}-\overline{x})^2\sum (y_{i}-\overline{y})^2}} \]
tf <- getFreqDir("testData")
res <-cor(tf)
round(res,2)
## test1 test2 test3
## test1 1.00 -0.29 -0.38
## test2 -0.29 1.00 0.80
## test3 -0.38 0.80 1.00
行と列を転置する
t(tf)
## a b c d e f g h
## test1 3 4 13 0 7 0 0 0
## test2 2 4 2 0 1 11 7 0
## test3 2 0 3 1 1 9 7 4
round(cor(t(tf)),2)
## a b c d e f g h
## a 1.00 0.50 1.00 -0.50 1.00 -0.99 -1.00 -0.50
## b 0.50 1.00 0.43 -1.00 0.50 -0.34 -0.50 -1.00
## c 1.00 0.43 1.00 -0.43 1.00 -1.00 -1.00 -0.43
## d -0.50 -1.00 -0.43 1.00 -0.50 0.34 0.50 1.00
## e 1.00 0.50 1.00 -0.50 1.00 -0.99 -1.00 -0.50
## f -0.99 -0.34 -1.00 0.34 -0.99 1.00 0.99 0.34
## g -1.00 -0.50 -1.00 0.50 -1.00 0.99 1.00 0.50
## h -0.50 -1.00 -0.43 1.00 -0.50 0.34 0.50 1.00
plot(tf[,1],tf[,2], type="n",xlab=colnames(tf)[1],ylab=colnames(tf)[2])
text(tf[,1],tf[,2],rownames(tf))
cor(tf[,1],tf[,2])
## [1] -0.2876135
mtext(paste("corr = " , round(cor(tf[,1],tf[,2]),2)), side=3)
round(res,2)
## hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo waseda
## hiroshima 1.00 0.63 0.71 0.67 0.67 0.65 0.60 0.71
## kufs 0.63 1.00 0.80 0.62 0.71 0.76 0.74 0.79
## kyoto 0.71 0.80 1.00 0.75 0.82 0.87 0.81 0.86
## osaka1 0.67 0.62 0.75 1.00 0.84 0.80 0.71 0.75
## osaka2 0.67 0.71 0.82 0.84 1.00 0.89 0.80 0.80
## osaka3 0.65 0.76 0.87 0.80 0.89 1.00 0.84 0.81
## tokyo 0.60 0.74 0.81 0.71 0.80 0.84 1.00 0.76
## waseda 0.71 0.79 0.86 0.75 0.80 0.81 0.76 1.00
library(proxy)
##
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
tf <- getFreqDir("testData")
行と列を転置する
corr <- simil(t(tf))
round(corr, 2)
## test1 test2
## test2 -0.29
## test3 -0.38 0.80
corr <- simil(t(tf), diag=T)
round(corr, 2)
## test1 test2 test3
## test1 0.00
## test2 -0.29 0.00
## test3 -0.38 0.80 0.00
tf
## test1 test2 test3
## a 3 2 2
## b 4 4 0
## c 13 2 3
## d 0 0 1
## e 7 1 1
## f 0 11 9
## g 0 7 7
## h 0 0 4
corr <- simil(tf)
round(corr, 2)
## a b c d e f g
## b 0.84
## c 0.71 0.60
## d 0.83 0.74 0.53
## e 0.83 0.80 0.74 0.79
## f 0.39 0.35 0.17 0.37 0.22
## g 0.59 0.55 0.37 0.57 0.42 0.80
## h 0.79 0.63 0.57 0.89 0.68 0.48 0.68
\[Cos(x,y)= \frac{\sum x_{i} y_{i}}{\sqrt{\sum x_{i}^2\sum y_{i}^2}} \]
simil(t(tf), method="cosine")
## test1 test2
## test2 0.2526633
## test3 0.2628980 0.8973604
round(res,2)
## hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo
## kufs 0.65
## kyoto 0.73 0.81
## osaka1 0.68 0.65 0.77
## osaka2 0.69 0.73 0.83 0.84
## osaka3 0.66 0.77 0.87 0.81 0.90
## tokyo 0.62 0.75 0.81 0.72 0.80 0.84
## waseda 0.73 0.80 0.87 0.77 0.81 0.82 0.77
tf <- getFreqDir("univ")
hc <- hclust(dist(t(tf)))
plot(hc)
rect.hclust(hc, k=3, border="red")
tf <- getFreqDir("univ")
hc <- hclust(dist(t(tf), method = "canberra"), method = "ward")
## The "ward" method has been renamed to "ward.D"; note new "ward.D2"
plot(hc)
rect.hclust(hc, k=3, border="red")