========================================================
source("getWordList.R")
freqTable<-getWordList("msg-osakau.txt")
freq <- data.frame(freqTable)
relative <- data.frame(freqTable / sum(freqTable))
freqMtx <- merge(freq, relative, all=T, by="wordLst")
## wordLst Freq.x Freq.y
## 1 1931 1 0.002012072
## 2 2003 1 0.002012072
## 3 2007 1 0.002012072
## 4 a 10 0.020120724
## 5 ability 1 0.002012072
## 6 academic 1 0.002012072
rownames(freqMtx)<-as.character(freqMtx[,1])
## wordLst Freq.x Freq.y
## 1931 1931 1 0.002012072
## 2003 2003 1 0.002012072
## 2007 2007 1 0.002012072
## a a 10 0.020120724
## ability ability 1 0.002012072
## academic academic 1 0.002012072
freqMtx<-freqMtx[-1]
colnames(freqMtx) <- c("raw", "relative")
## raw relative
## 1931 1 0.002012072
## 2003 1 0.002012072
## 2007 1 0.002012072
## a 10 0.020120724
## ability 1 0.002012072
## academic 1 0.002012072
freqMtx[1,]
freqMtx[,2]
freqMtx[2,2]
freqMtx$raw
freqMtx[freqMtx$raw==3,]
freqMtx[rownames(freqMtx)=="osaka",]
order(freqMtx$raw, decreasing = TRUE)
freqMtx<-freqMtx[order(freqMtx$raw, decreasing = TRUE),]
## raw relative
## 1931 1 0.002012072
## 2003 1 0.002012072
## 2007 1 0.002012072
## a 10 0.020120724
## ability 1 0.002012072
## academic 1 0.002012072
names(freqTable)
lapply(names(freqTable), nchar)
lapply(names(freqTable), paste, "@ osaka-u")
apply(freqMtx, 1, sum)
apply(freqMtx, 2, sum)
apply(freqMtx, c(1,2), sqrt)
apply(freqMtx, c(1,2), function(x) x*10)
library("wordcloud")
## Loading required package: RColorBrewer
wordcloud(rownames(freqMtx),freqMtx$raw)
wordcloud(rownames(freqMtx),freqMtx$raw,min.freq=2,colors=rainbow(10))
\[Frequency=\frac{K}{Rank^A} \] K,A: 定数
K=freqMtx[1,1]
A=0.8
rank <- seq(1:dim(freqMtx)[1])
zipf <- K/rank^A
## [1] 1.0000000 0.5743492 0.4152436 0.3298770 0.2759459 0.2384948
title="Zipf's Law"
xlabel="Rank"
ylabel="Frequency"
plot(zipf, log="xy", type="l",col="red" ,
xlim=c(1,nrow(freqMtx)),ylim=c(1,50),main=title, xlab=xlabel, ylab=ylabel)
par(new=T)
plot(rank,freqMtx[,1], xlim=c(1,nrow(freqMtx)), ylim=c(1,50),log="xy",pch=8, col="darkgreen", main=title, xlab=xlabel, ylab=ylabel)
配置:“bottomright”, “bottom”, “bottomleft”, “left”, “topleft”, “top”, “topright”, “right”, “center” ラベル lty: 線の種類 pch: プロットの種類
legend("topright",c("Frequency","Zipf's law"),lty=c(NA,1),pch=c(8,NA),col=c("darkgreen","red"))
library("manipulate")
title="Zipf's Law"
xlabel="Rank"
ylabel="Frequency"
A=0.8
rank <- seq(1:dim(freqMtx)[1])
manipulate(
{
zipf <- constK/rank^A
plot(zipf, log="xy", type="l",col="red" ,
xlim=c(1,nrow(freqMtx)),ylim=c(1,50),main=title, xlab=xlabel, ylab=ylabel)
par(new=T)
plot(rank,freqMtx[,1], xlim=c(1,nrow(freqMtx)), ylim=c(1,50),log="xy",pch=8, col="darkgreen", main=title, xlab=xlabel, ylab=ylabel)
legend("topright",c("Frequency","Zipf's law"),lty=c(NA,1),pch=c(8,NA),col=c(col="darkgreen",col="red"))
text(20, 50, "Frequency=K/Rank^A")
text(20, 40, paste("K=", constK))
text(20, 30, paste("A=", A))
}
, constK=slider(10,50, initial=freqMtx[1,1],step=2)
)
定数Aのスライダー 最小0, 最大1 ステップ:0.05 に設定
source("getFreqMtx.R")
res<-getFreqMtx("msg-osakau.txt")
head(res)
## raw relative
## the 32 0.06438632
## and 31 0.06237425
## of 30 0.06036217
## in 15 0.03018109
## university 15 0.03018109
## osaka 14 0.02816901