========================================================
#sort(table(wordLst), decreasing=TRUE)
source("getFreqMtx2.R")
freqMtx<-getFreqMtx2("msg-osakau.txt")
#head(freqMtx)
freqMtx<-freqMtx[order(freqMtx$raw, decreasing = TRUE),]
head(freqMtx)
## term raw relative
## 204 the 32 0.06438632
## 20 and 31 0.06237425
## 140 of 30 0.06036217
## 108 in 15 0.03018109
## 221 university 15 0.03018109
## 144 osaka 14 0.02816901
source("getFreqMtx2.R")
freqMtx<-getFreqMtx2("msg-osakau.txt")
head(freqMtx)
## term raw relative
## 204 the 32 0.06438632
## 20 and 31 0.06237425
## 140 of 30 0.06036217
## 108 in 15 0.03018109
## 221 university 15 0.03018109
## 144 osaka 14 0.02816901
## wordcloud |
r library("wordcloud") |
## Loading required package: RColorBrewer |
### wordcloud関数 |
r wordcloud(freqMtx$term,freqMtx$raw) |
### wordcloud関数:色 |
r wordcloud(freqMtx$term,freqMtx$raw,min.freq=2,colors=rainbow(10)) |
## 条件文: if ### 関数selectFreq1の作成 |
r selectFreq1 <- function(Fmtx, relative=FALSE){ if (relative==TRUE){ res<-Fmtx[c(1,3)] }else{ res<-Fmtx[1:2] } return(res) } |
### 実行例 |
r selectFreq1(freqMtx) selectFreq1(freqMtx,relative=FALSE) selectFreq1(freqMtx,relative=TRUE) |
## 条件分岐: switch |
r selectFreq2 <- function(Fmtx, relaFlag=1){ res<-switch (relaFlag, Fmtx[1:2], Fmtx[c(1,3)] ) return(res) } |
### 実行例 |
r selectFreq2(freqMtx) selectFreq2(freqMtx,relaFlag=1) selectFreq2(freqMtx,relaFlag=2) |
## 条件分岐: switch(文字) |
r selectFreq3 <- function(Fmtx, fType="all"){ res<-switch (fType, "all" = Fmtx, "raw" = Fmtx[1:2], "relative" =Fmtx[c(1,3)] ) return(res) } |
### 実行例 |
r selectFreq3(freqMtx) selectFreq3(freqMtx, fType="raw") selectFreq3(freqMtx, fType="relative") |
dirName <- "testData"
files <- list.files(dirName)
files
## [1] "test1.txt" "test2.txt" "test3.txt" "test4.txt"
filesDir <- unlist(lapply(dirName, paste, files, sep = "/"))
filesDir
## [1] "testData/test1.txt" "testData/test2.txt" "testData/test3.txt"
## [4] "testData/test4.txt"
freqLst <- lapply(filesDir, getFreqMtx2)
#freqLst
rawfreqLst<-lapply(freqLst, selectFreq1)
#rawfreqLst
files
## [1] "test1.txt" "test2.txt" "test3.txt" "test4.txt"
strsplit(files[1],"\\.")
## [[1]]
## [1] "test1" "txt"
unlist(strsplit(files[1],"\\."))[1]
## [1] "test1"
fnames<-unlist(lapply(files, function(x) unlist(strsplit(x,"\\."))[1]))
fnames
## [1] "test1" "test2" "test3" "test4"
#fnames_b <-sapply(sapply(files, strsplit,"\\."),function(x)x[1])
#for(i in 1:length(rawfreqLst)) {colnames(rawfreqLst[[i]])[2]=fnames[i]}
tf <- rawfreqLst[[1]]
for (i in rawfreqLst[-1]) tf <- merge(tf, i, all = T, by = "term")
## Warning in merge.data.frame(tf, i, all = T, by = "term"): column names
## 'raw.x', 'raw.y' are duplicated in the result
tf
## term raw.x raw.y raw.x raw.y
## 1 a 3 2 2 4
## 2 b 4 4 NA 4
## 3 c 13 2 3 5
## 4 d NA NA 1 1
## 5 e 7 1 1 2
## 6 f NA 11 9 20
## 7 g NA 7 7 14
## 8 h NA NA 4 4
tf[is.na(tf)] <- 0
tf <- tf[order(as.vector(tf$term)), ]
row.names(tf) <- tf[, 1]
tf <- tf[-1]
colnames(tf) <- fnames
tf
## test1 test2 test3 test4
## a 3 2 2 4
## b 4 4 0 4
## c 13 2 3 5
## d 0 0 1 1
## e 7 1 1 2
## f 0 11 9 20
## g 0 7 7 14
## h 0 0 4 4
library(dplyr)
tf_b <- rawfreqLst[[1]]
for (i in rawfreqLst[-1]) tf_b <- full_join(tf_b, i, all = T, by = "term")
tf_b[is.na(tf_b)] <- 0
row.names(tf_b) <- tf_b[, 1]
tf_b <- tf_b[-1]
colnames(tf_b) <- fnames
tf_b
\[Frequency=\frac{K}{Rank^A} \] K,A: 定数
K=freqMtx[1,1]
A=0.8
rank <- seq(1:dim(freqMtx)[1])
zipf <- K/rank^A
## Warning in Ops.factor(K, rank^A): '/' not meaningful for factors
## [1] NA NA NA NA NA NA
title="Zipf's Law"
xlabel="Rank"
ylabel="Frequency"
plot(zipf, log="xy", type="l",col="red" ,
xlim=c(1,nrow(freqMtx)),ylim=c(1,50),main=title, xlab=xlabel, ylab=ylabel)
par(new=T)
plot(rank,freqMtx[,1], xlim=c(1,nrow(freqMtx)), ylim=c(1,50),log="xy",pch=8, col="darkgreen", main=title, xlab=xlabel, ylab=ylabel)
配置:“bottomright”, “bottom”, “bottomleft”, “left”, “topleft”, “top”, “topright”, “right”, “center” ラベル lty: 線の種類 pch: プロットの種類
legend("topright",c("Frequency","Zipf's law"),lty=c(NA,1),pch=c(8,NA),col=c("darkgreen","red"))
library("manipulate")
title="Zipf's Law"
xlabel="Rank"
ylabel="Frequency"
A=0.8
rank <- seq(1:dim(freqMtx)[1])
manipulate(
{
zipf <- constK/rank^A
plot(zipf, log="xy", type="l",col="red" ,
xlim=c(1,nrow(freqMtx)),ylim=c(1,50),main=title, xlab=xlabel, ylab=ylabel)
par(new=T)
plot(rank,freqMtx[,1], xlim=c(1,nrow(freqMtx)), ylim=c(1,50),log="xy",pch=8, col="darkgreen", main=title, xlab=xlabel, ylab=ylabel)
legend("topright",c("Frequency","Zipf's law"),lty=c(NA,1),pch=c(8,NA),col=c(col="darkgreen",col="red"))
text(20, 50, "Frequency=K/Rank^A")
text(20, 40, paste("K=", constK))
text(20, 30, paste("A=", A))
}
, constK=slider(10,50, initial=freqMtx[1,1],step=2)
)
定数Aのスライダー 最小0, 最大1 ステップ:0.05 に設定
install.packages("shiny", dependencies = TRUE)
library(shiny)