selectFreq1 <- function(Fmtx, relative=FALSE){
if (relative==TRUE){
res<-Fmtx[c(1,3)]
}else{
res<-Fmtx[1:2]
}
return(res)
}
source("getFreqMtx2.R")
dirName <- "testData"
files <- list.files(dirName)
filesDir <- unlist(lapply(dirName, paste, files, sep = "/"))
fnames<-unlist(lapply(files, function(x) unlist(strsplit(x,"\\."))[1]))
freqLst <- lapply(filesDir, getFreqMtx2)
rawfreqLst<-lapply(freqLst, selectFreq1)
tf <- rawfreqLst[[1]]
for (i in rawfreqLst[-1]) tf <- merge(tf, i, all = T, by = "term")
## Warning in merge.data.frame(tf, i, all = T, by = "term"): column names
## 'raw.x', 'raw.y' are duplicated in the result
tf[is.na(tf)] <- 0
tf <- tf[order(as.vector(tf$term)), ]
row.names(tf) <- tf[, 1]
tf <- tf[-1]
colnames(tf) <- fnames
tf
## test1 test2 test3 test4
## a 3 2 2 4
## b 4 4 0 4
## c 13 2 3 5
## d 0 0 1 1
## e 7 1 1 2
## f 0 11 9 20
## g 0 7 7 14
## h 0 0 4 4
複数のテキストに共通して出現する単語の低く評価 ### TF-IDF 1 \[w=tf*log(\frac{N}{df}) \]
N<-ncol(tf)
df<-apply(tf, 1, function(x) length(x[x>0]) )
w<-round(tf*log(N/df),2)
w
## test1 test2 test3 test4
## a 0.00 0.00 0.00 0.00
## b 1.15 1.15 0.00 1.15
## c 0.00 0.00 0.00 0.00
## d 0.00 0.00 0.69 0.69
## e 0.00 0.00 0.00 0.00
## f 0.00 3.16 2.59 5.75
## g 0.00 2.01 2.01 4.03
## h 0.00 0.00 2.77 2.77
w[rowSums(w)>0,]
## test1 test2 test3 test4
## b 1.15 1.15 0.00 1.15
## d 0.00 0.00 0.69 0.69
## f 0.00 3.16 2.59 5.75
## g 0.00 2.01 2.01 4.03
## h 0.00 0.00 2.77 2.77
## Warning in merge.data.frame(tf, i, all = T, by = "term"): column names
## 'raw.x', 'raw.y' are duplicated in the result
## Warning in merge.data.frame(tf, i, all = T, by = "term"): column names
## 'raw.x', 'raw.y' are duplicated in the result
## Warning in merge.data.frame(tf, i, all = T, by = "term"): column names
## 'raw.x', 'raw.y', 'raw.x', 'raw.y' are duplicated in the result
## Warning in merge.data.frame(tf, i, all = T, by = "term"): column names
## 'raw.x', 'raw.y', 'raw.x', 'raw.y' are duplicated in the result
## Warning in merge.data.frame(tf, i, all = T, by = "term"): column names
## 'raw.x', 'raw.y', 'raw.x', 'raw.y', 'raw.x', 'raw.y' are duplicated in the
## result
head(tf)
## hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo waseda
## 000 0 0 0 0 0 0 0 2
## 1 0 0 0 1 0 0 0 1
## 10 0 0 1 0 0 0 0 1
## 100 0 0 0 0 1 0 0 0
## 11 2 0 0 0 0 0 0 0
## 12 1 0 0 0 0 0 0 0
tf<-tf[order(tf[,2], decreasing = TRUE),]
head(tf)
## hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo waseda
## the 7 25 36 16 31 32 52 39
## in 4 20 26 4 11 15 4 32
## and 7 18 38 15 26 31 22 37
## of 5 18 35 10 26 30 38 34
## to 11 17 26 11 17 11 15 35
## that 0 13 8 2 7 7 10 7
tf<-tf[order(tf$hiroshima, decreasing = TRUE),]
head(tf)
## hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo waseda
## to 11 17 26 11 17 11 15 35
## the 7 25 36 16 31 32 52 39
## and 7 18 38 15 26 31 22 37
## university 6 5 16 18 21 15 9 22
## of 5 18 35 10 26 30 38 34
## hiroshima 5 0 0 0 0 0 0 0
install.packages("shiny", dependencies = TRUE)
library(shiny)
runApp("app_hoge")
runApp("app_hoge2") //文字列追加
shinyUI(bootstrapPage(
# Application title
headerPanel("Test Hoge"),
# Sidebar
sidebarPanel(
textInput("msg", "Please input your message:")
),
# Show a message
mainPanel(
textOutput("showMessage")
)
))
shinyServer(function(input, output) {
output$showMessage <- renderText({
input$msg
})
})
freq<-tf$hiroshima[1:50]
label<-rownames(tf)[1:50]
barplot(freq, names=label,las=3, col="blue")
runApp("app_freqBar")
runApp("app_freqBar2")
alt text
source("getFreqDir.R")
res <- getFreqDir("univ")
head(round(res,2))
## hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo waseda
## to 11 17 26 11 17 11 15 35
## and 7 18 38 15 26 31 22 37
## the 7 25 36 16 31 32 52 39
## university 6 5 16 18 21 15 9 22
## hiroshima 5 0 0 0 0 0 0 0
## of 5 18 35 10 26 30 38 34
res1 <- getFreqDir("univ",relative=TRUE)
head(round(res1,2))
## hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo waseda
## to 0.06 0.03 0.03 0.04 0.03 0.02 0.03 0.04
## and 0.04 0.03 0.05 0.05 0.05 0.06 0.04 0.04
## the 0.04 0.05 0.05 0.05 0.06 0.06 0.10 0.04
## university 0.03 0.01 0.02 0.06 0.04 0.03 0.02 0.02
## hiroshima 0.03 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## of 0.03 0.03 0.05 0.03 0.05 0.06 0.08 0.04
res2 <- getFreqDir("univ",tfidf=TRUE)
head(round(res2,2))
## hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo waseda
## hiroshima 10.40 0.00 0.00 0 0 0 0 0.00
## 11 4.16 0.00 0.00 0 0 0 0 0.00
## d 4.16 0.00 0.00 0 0 0 0 0.00
## international 2.94 0.00 0.98 0 0 0 0 5.88
## peace 2.77 1.39 0.00 0 0 0 0 0.00
## 12 2.08 0.00 0.00 0 0 0 0 0.00