source("getFreqDir.R")
tf <- getFreqDir("univ")
head(tf)
## hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo waseda
## to 11 17 26 11 17 11 15 35
## and 7 18 38 15 26 31 22 37
## the 7 25 36 16 31 32 52 39
## university 6 5 16 18 21 15 9 22
## hiroshima 5 0 0 0 0 0 0 0
## of 5 18 35 10 26 30 38 34
library(proxy)
##
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
行と列を転置する
corr <- simil(t(tf))
round(corr, 2)
## hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo
## kufs 0.63
## kyoto 0.71 0.80
## osaka1 0.67 0.62 0.75
## osaka2 0.67 0.71 0.82 0.84
## osaka3 0.65 0.76 0.87 0.80 0.89
## tokyo 0.60 0.74 0.81 0.71 0.80 0.84
## waseda 0.71 0.79 0.86 0.75 0.80 0.81 0.76
#tf <- getFreqDir("univ")
hc <- hclust(dist(t(tf)))
plot(hc)
rect.hclust(hc, k=3, border="red")
#tf <- getFreqDir("univ")
mds <- cmdscale(dist(t(tf)),2)
plot(mds,type="n")
text(mds,rownames(mds))
mds <- cmdscale(dist(t(tf), method = "canberra"),2)
plot(mds,type="n")
text(mds,rownames(mds))
mds <- cmdscale(dist(t(tf), method = "Manhattan"),2)
plot(mds,type="n")
text(mds,rownames(mds))
mds <- cmdscale(dist(t(tf), method = "cosine"),2)
plot(mds,type="n")
text(mds,rownames(mds))
#dist(t(tf), method = "cosine")
#simil(t(tf), method="cosine")
test1<-"I am innocent of the allegations made against me, Ghosn told the Tokyo District Court in English, adding that he has been wrongly accused and unfairly detained. "
substr(test1,1,4)
## [1] "I am"
size=4
len<-nchar(test1)-size+1
ngramLst <- c()
for(i in 1:nchar(test1)){
ngramLst<-rbind(ngramLst,(substr(test1,i,i+size-1)))
}
head(ngramLst)
## [,1]
## [1,] "I am"
## [2,] " am "
## [3,] "am i"
## [4,] "m in"
## [5,] " inn"
## [6,] "inno"
wordLst <- strsplit(test1, "[[:space:]]|[[:punct:]]")
wordLst <- unlist(wordLst)
wordLst <- tolower(wordLst)
wordLst <- wordLst[wordLst != ""]
head(wordLst)
## [1] "i" "am" "innocent" "of" "the"
## [6] "allegations"
size=2
start=1
step=2
wordLst[start:(start+size-1)]
## [1] "i" "am"
strLst<-c()
len<-length(wordLst)-size +step
for(i in seq(1, len ,step)) {
strLst<-rbind(strLst,wordLst[i:(i+size-1)])
}
strLst
## [,1] [,2]
## [1,] "i" "am"
## [2,] "innocent" "of"
## [3,] "the" "allegations"
## [4,] "made" "against"
## [5,] "me" "ghosn"
## [6,] "told" "the"
## [7,] "tokyo" "district"
## [8,] "court" "in"
## [9,] "english" "adding"
## [10,] "that" "he"
## [11,] "has" "been"
## [12,] "wrongly" "accused"
## [13,] "and" "unfairly"
## [14,] "detained" NA
str<-strLst[1,]
pLst <- c()
for(i in 1:(length(str)-1)){
for(j in (i+1):length(str)){
if(!is.na(str[j])){
tmp<-sort(c(str[i],str[j]))
pLst<-rbind(pLst,tmp)
}
}
}
pLst
## [,1] [,2]
## tmp "am" "i"
source("netwkPairs.R")
getPairs(strLst[1,])
## [,1] [,2]
## tmp "am" "i"
getPairs(strLst[10,])
## [,1] [,2]
## tmp "he" "that"
getPairsLst(strLst)
## [,1] [,2]
## tmp "am" "i"
## tmp "innocent" "of"
## tmp "allegations" "the"
## tmp "against" "made"
## tmp "ghosn" "me"
## tmp "the" "told"
## tmp "district" "tokyo"
## tmp "court" "in"
## tmp "adding" "english"
## tmp "he" "that"
## tmp "been" "has"
## tmp "accused" "wrongly"
## tmp "and" "unfairly"
getNstr(wordLst,size,step)
## [,1] [,2]
## [1,] "i" "am"
## [2,] "innocent" "of"
## [3,] "the" "allegations"
## [4,] "made" "against"
## [5,] "me" "ghosn"
## [6,] "told" "the"
## [7,] "tokyo" "district"
## [8,] "court" "in"
## [9,] "english" "adding"
## [10,] "that" "he"
## [11,] "has" "been"
## [12,] "wrongly" "accused"
## [13,] "and" "unfairly"
## [14,] "detained" NA
pFreq<-getPairsFreq(strLst)
head(pFreq)
## Term1 Term2 Freq
## 1 am i 1
## 2 innocent of 1
## 3 allegations the 1
## 4 against made 1
## 5 ghosn me 1
## 6 the told 1
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
wng<-as.undirected(graph.data.frame(pFreq))
plot(wng)
filename <- "Ghosn.txt"
txt<-readLines(filename)
txt<- splitWdsEn(txt)
head(txt)
## [1] "embattled" "former" "nissan" "motor" "co" "chairman"
size=4
step=2
strLst<-getNstr(txt,size,step)
head(strLst)
## [,1] [,2] [,3] [,4]
## [1,] "embattled" "former" "nissan" "motor"
## [2,] "nissan" "motor" "co" "chairman"
## [3,] "co" "chairman" "carlos" "ghosn"
## [4,] "carlos" "ghosn" "denied" "allegations"
## [5,] "denied" "allegations" "of" "financial"
## [6,] "of" "financial" "misconduct" "tuesday"
pFreq<-getPairsFreq(strLst)
head(pFreq)
## Term1 Term2 Freq
## 1 embattled former 1
## 2 embattled nissan 1
## 3 embattled motor 1
## 4 former nissan 1
## 5 former motor 1
## 6 motor nissan 5
pFreq_s<-pFreq[pFreq$Freq>=7,]
head(pFreq_s)
## Term1 Term2 Freq
## 80 allegations the 7
## 93 ghosn told 10
## 94 ghosn the 9
## 95 the told 8
## 99 district the 7
## 100 district tokyo 9
wng<-as.undirected(graph.data.frame(pFreq_s))
plot(wng)
wng<-as.undirected(graph.data.frame(pFreq_s))
E(wng)$weight<-pFreq_s$Freq
deg<-degree(wng)
plot(wng,edge.width=E(wng)$weight,vertex.size=30*(deg/max(deg)))
filename <- "osaka-u_ja_wakati.txt"
txt<-readLines(filename)
txt<- splitWdsEn(txt)
head(txt)
## [1] "この" "たび" "大阪大学" "第" "18" "代"
size=4
step=2
strLst<-getNstr(txt,size,step)
head(strLst)
## [,1] [,2] [,3] [,4]
## [1,] "この" "たび" "大阪大学" "第"
## [2,] "大阪大学" "第" "18" "代"
## [3,] "18" "代" "総長" "に"
## [4,] "総長" "に" "就任" "いたし"
## [5,] "就任" "いたし" "まし" "た"
## [6,] "まし" "た" "西尾" "章治郎"
pFreq<-getPairsFreq(strLst)
head(pFreq)
## Term1 Term2 Freq
## 1 この たび 1
## 2 この 大阪大学 1
## 3 この 第 1
## 4 たび 大阪大学 1
## 5 たび 第 1
## 6 大阪大学 第 2
pFreq_s<-pFreq[pFreq$Freq>=3,]
head(pFreq_s)
## Term1 Term2 Freq
## 26 た まし 4
## 36 です 大阪大学 3
## 54 の 政財界 3
## 56 の 強い 4
## 59 の を 6
## 72 に 年 3
#par(family = "HiraKakuProN-W3")
wng<-as.undirected(graph.data.frame(pFreq_s))
plot(wng)
wng<-as.undirected(graph.data.frame(pFreq_s))
E(wng)$weight<-pFreq_s$Freq
deg<-degree(wng)
plot(wng,edge.width=E(wng)$weight,vertex.size=30*(deg/max(deg)))
install.packages("d3Network")
d3SimpleNetwork(net, width=800, height=500,
standAlone=FALSE, opacity = 1.0,
charge = -200, fontsize = as.numeric(input$sel_fontsize),
parentElement = "#d3networkPlot")
library(shiny)
library(d3Network)
runApp("app_netwk")