Lecture4: Manipulate関数

getFreq2015.Rを読み込む

source("getFreq2015.R")
単語頻度数分布
res<-getFreq2015("osaka-u.txt")
##                  term osaka-u
## the               the      33
## and               and      31
## of                 of      31
## university university      16
## in                 in      15

補足:ファイル名を抽出

filename = "osaka-u.txt"
fpathL = unlist(strsplit(filename, "/|\\."))
label = fpathL[length(fpathL) - 1]
label
## [1] "osaka-u"
filename = "Data/osaka-u.txt"
fpathL = unlist(strsplit(filename, "/|\\."))
label = fpathL[length(fpathL) - 1]
label
## [1] "osaka-u"

単語出現頻度分布

title="Word Frequency Distribution"
xlabel="Rank"
ylabel="Frequency"
plot(rownames(res),res[,2], pch=8, col="darkgreen", main=title, xlab=xlabel, ylab=ylabel)

両軸対数表示:log=“xy”

plot(rownames(res),res[,2], xlim=c(1,nrow(res)), ylim=c(1,100),log="xy",pch=8, col="darkgreen", main=title, xlab=xlabel, ylab=ylabel)

単語の文字数を調べる

res$term[1]
## [1] "the"
nchar(res$term[1])
## [1] 3

単語長度数集計:table

charlenF <- table(nchar(res$term))
charlenF
## 
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 
##  4 14 19 34 33 25 30 27 28 13 11  3  4  1

単語長分布(Types)

title="Word Length Frequency Distribution (Types)"
xlabel="Word Length"
ylabel="Frequency"
xmax=length(charlenF)
ymax=max(charlenF)
plot(charlenF, type="b",pch=8,col="orange",xlim=c(1,xmax),ylim=c(1,ymax),main=title, xlab=xlabel, ylab=ylabel)

Zipf’sの法則

\[Frequency=\frac{K}{Rank^A} \] K,A: 定数

K=res[1,2]
A=0.75
K
## the 
##  33
rank <- seq(1:dim(res)[1])
zipf <- unlist(lapply(rank, function(r) K/r^A))

zipfの結果抜粋

##       the       the       the       the       the       the       the 
## 33.000000 19.621917 14.476814 11.667262  9.869302  8.607965  7.668147 
##       the       the       the 
##  6.937395  6.350853  5.868322

グラフ図

title="Zipf's Law"
xlabel="Rank"
ylabel="Frequency"
plot(zipf, log="xy", type="l",col="red" ,
xlim=c(1,nrow(res)),ylim=c(1,100),main=title, xlab=xlabel, ylab=ylabel)

頻度散布図&Zipf’sの理論式の重ね書き

par(new=T)
plot(rownames(res),res[,2], xlim=c(1,nrow(res)), ylim=c(1,100),log="xy",pch=8, col="darkgreen", main=title, xlab=xlabel, ylab=ylabel)

凡例をつける: legend

配置:“bottomright”, “bottom”, “bottomleft”, “left”, “topleft”, “top”, “topright”, “right”, “center” ラベル lty: 線の種類 pch: プロットの種類

legend("topright",c("Frequency","Zipf's law"),lty=c(NA,1),pch=c(8,NA),col=c("darkgreen","red"))

グラフ図

manipulate package

インタラクティブなプロット

library(manipulate)

Zipfs理論値の色の選択

picker()関数

title="Zipf's Law"
xlabel="Rank"
ylabel="Frequency"

manipulate(
  {
    plot(zipf, log="xy", type="l",col=zipfsColors ,
    xlim=c(1,nrow(res)),ylim=c(1,100),main=title, xlab=xlabel, ylab=ylabel)
    par(new=T)
    plot(rownames(res),res[,2], xlim=c(1,nrow(res)), ylim=c(1,100),log="xy",pch=8, col="darkgreen", main=title, xlab=xlabel, ylab=ylabel)
    legend("topright",c("Frequency","Zipf's law"),lty=c(NA,1),pch=c(8,NA),col=c("darkgreen",col=zipfsColors))
  }
, zipfsColors=picker("red", "yellow", "green", "violet", "orange", "blue", "pink", "cyan") 
)

実習1

実際の値(*プロット)の色を選べるように変えてください。 初期値の色は“darkgreen”を指定 alt text

スライダー:定数Kの値を変化させる

title="Zipf's Law"
xlabel="Rank"
ylabel="Frequency"

K=res[1,2]
A=0.75
K
rank <- seq(1:dim(res)[1])

manipulate(
  {
    zipf <- unlist(lapply(rank, function(r) constK/r^A))
    plot(zipf, log="xy", type="l",col="red" ,
    xlim=c(1,nrow(res)),ylim=c(1,100),main=title, xlab=xlabel, ylab=ylabel)
    par(new=T)
    plot(rownames(res),res[,2], xlim=c(1,nrow(res)), ylim=c(1,100),log="xy",pch=8, col="darkgreen", main=title, xlab=xlabel, ylab=ylabel)
    legend("topright",c("Frequency","Zipf's law"),lty=c(NA,1),pch=c(8,NA),col=c(col="darkgreen",col="red"))
    text(5, 85, "Frequency=K/Rank^A")
    text(5, 70, paste("K=", constK))
    text(5, 60, paste("A=", A))
  }
, constK=slider(10,100, initial=res[1,2],step=2)
)

実習2:定数Aの値を変化させる

alt text