source("getFreqDir.R")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
univTable <- getFreqDir("univ")
head(univTable)
\[RTTR=\frac{types}{\sqrt{tokens}} \] ### kyoto1のGuiraud値を計算(小数点2桁)
## [1] 12.09
relative.univTable <- getFreqDir("univ", relative=TRUE)
head(relative.univTable)
head(relative.univTable*1000000)
hc <- hclust(dist(t(relative.univTable)))
plot(hc)
rect.hclust(hc, k=3, border="red")
hc <- hclust(dist(t(relative.univTable), method = "canberra"), method = "ward.D2")
plot(hc)
rect.hclust(hc, k=3, border="red")
library(proxy)
##
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
hc <- hclust(dist(t(relative.univTable), method = "cosine"), method = "ward.D2")
plot(hc)
rect.hclust(hc, k=3, border="red")
### Clusters based on a given number of groups
cutree(hc,k=3)
## kyoto1 kyoto2 osaka1 osaka2 osaka3 osaka4 tokyo1 tokyo2 waseda1 waseda2
## 1 1 1 1 1 1 2 2 3 3
hc <- hclust(dist(t(relative.univTable), method = "cosine"), method = "ward.D2")
plot(hc)
rect.hclust(hc, h=0.28, border="blue")
#cutree(hc,h=0.28)
hc <- hclust(dist(t(relative.univTable), method = "cosine"), method = "ward.D2")
plot(hc)
rect.hclust(hc, h=0.28, which=2, border="blue")
top15<-sort(rowSums(relative.univTable),decreasing=TRUE)[1:15]
names(top15)
## [1] "the" "and" "of" "to" "university"
## [6] "a" "in" "as" "is" "that"
## [11] "osaka" "with" "for" "research" "society"
TFtop15<-univTable[rownames(relative.univTable) %in% names(top15),]
tfidf.univTable <- getFreqDir("univ", tfidf = 1)
dim(tfidf.univTable)
## [1] 1487 10
head(tfidf.univTable)
#View(tfidf.univTable)
tfidf.top15<-sort(rowSums(tfidf.univTable),decreasing=TRUE)[1:15]
names(tfidf.top15)
## [1] "waseda" "osaka" "kyoto" "ou" "students"
## [6] "i" "programs" "dialogue" "tokyo" "need"
## [11] "english" "new" "universities" "researchers" "vision"
tfidf.TFtop15<-tfidf.univTable[rownames(tfidf.univTable) %in% names(tfidf.top15),]
#View(tfidf.TFtop15)
id = 6
tmp_freq <- tfidf.TFtop15[tfidf.TFtop15[id]>0,][,id]
tmp_wordLst <- rownames(tfidf.TFtop15[tfidf.TFtop15[id]>0,])
univName<-rep(colnames(tfidf.TFtop15)[id],length(tmp_wordLst))
cbind(univName, tmp_wordLst, tmp_freq)
## univName tmp_wordLst tmp_freq
## [1,] "osaka4" "universities" "2.49672460757113"
## [2,] "osaka4" "i" "4.63677427120352"
## [3,] "osaka4" "new" "2.85339955150986"
## [4,] "osaka4" "osaka" "10.0791980506157"
## [5,] "osaka4" "ou" "20.7232658369464"
## [6,] "osaka4" "vision" "5.49774439124493"
pairs <- c()
for(i in 1:6){
Freq <- tfidf.TFtop15[tfidf.TFtop15[i]>0,][,i]
Term <- rownames(tfidf.TFtop15[tfidf.TFtop15[i]>0,])
Univ<-rep(colnames(tfidf.TFtop15)[i],length(Term))
pairs <- rbind(pairs,cbind(Univ, Term, Freq))
}
head(pairs)
## Univ Term Freq
## [1,] "kyoto1" "kyoto" "16.094379124341"
## [2,] "kyoto1" "universities" "3.21007449544859"
## [3,] "kyoto1" "i" "2.49672460757113"
## [4,] "kyoto1" "new" "1.0700248318162"
## [5,] "kyoto1" "students" "1.0700248318162"
## [6,] "kyoto1" "dialogue" "2.40794560865187"
install.packages("igraph", dependencies = TRUE)
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
wng<-as.undirected(graph.data.frame(pairs))
plot(wng)
wng<-as.undirected(graph.data.frame(pairs))
deg<-degree(wng)
plot(wng, vertex.size=20*(deg/max(deg)))
wng<-as.undirected(graph.data.frame(pairs))
deg<-degree(wng)
E(wng)$weight<-as.numeric(as.numeric(pairs[,3]))
plot(wng, vertex.size=20*(deg/max(deg)), ,edge.width=10*E(wng)$weight/max(E(wng)$weight))