rep(1:10)
## [1] 1 2 3 4 5 6 7 8 9 10
seq(1,10)
## [1] 1 2 3 4 5 6 7 8 9 10
seq(1,10,by=2)
## [1] 1 3 5 7 9
readTxt <- function(fname){
cTxt<-readLines(fname)
cTxt<-strsplit(cTxt,"[[:space:]]|[[:punct:]]")
cTxt<-unlist(cTxt)
cTxt<-tolower(cTxt)
cTxt<- cTxt[cTxt != ""]
return (cTxt)
}
dirName="univ"
(files <- list.files(dirName))
## [1] "kyoto1.txt" "kyoto2.txt" "osaka1.txt" "osaka2.txt" "osaka3.txt"
## [6] "osaka4.txt" "tokyo1.txt" "tokyo2.txt" "waseda1.txt" "waseda2.txt"
filesDir <- unlist(lapply(dirName, paste, files, sep = "/"))
print(files[6])
## [1] "osaka4.txt"
filename <- filesDir[which(files==files[6])]
print(filename)
## [1] "univ/osaka4.txt"
txt <- readTxt(filename)
head(txt)
## [1] "greetings" "everyone" "today" "i" "was" "appointed"
node <- "education"
grep(node,txt, ignore.case = FALSE, value=T)
## [1] "education" "educational" "education"
grep("^education$",txt, value=T)
## [1] "education" "education"
which(txt == node)
## [1] 17 695
txt[which(txt == node)]
## [1] "education" "education"
library(shiny)
runApp("app_freqBar_ext")
source("getFreqDir.R")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
univMtx<- getFreqDir("univ")
univNames <- colnames(univMtx)
tmpUniv <- univMtx$osaka4
length(tmpUniv)
## [1] 1506
tmpUniv <-tmpUniv[tmpUniv>0]
orderLst <- order(tmpUniv, decreasing = TRUE)
tmpFreq <- tmpUniv[orderLst]
tmpWord <- rownames(univMtx)[orderLst][0:length(tmpUniv)]
head(tmpFreq)
## [1] 43 35 31 23 22 20
head(tmpWord)
## [1] "the" "and" "of" "to" "university"
## [6] "human"
(tmpName <- univNames[6])
## [1] "osaka4"
tmpUniv <-univMtx[which(colnames(univMtx)==tmpName)]
tmpUniv <-subset(tmpUniv, tmpUniv>0)
#tmpUniv <-subset(tmpUniv, tmpUniv[1]>0)
#head(tmpUniv)
#dim(tmpUniv)
orderLst <- order(tmpUniv[,1], decreasing = TRUE)
tmpFreq <- tmpUniv[orderLst,]
tmpWord <- rownames(tmpUniv)[orderLst]
head(tmpFreq)
## [1] 43 35 31 23 22 20
head(tmpWord)
## [1] "the" "and" "of" "to" "university"
## [6] "society"
install.packages("proxy", dependencies = TRUE)
library(proxy)
##
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
source("getFreqDir.R")
univTable <- getFreqDir("univ")
\[Corr(x,y)= \frac{\sum (x_{i}-\overline{x}) (y_{i}-\overline{y})}{\sqrt{\sum (x_{i}-\overline{x})^2\sum (y_{i}-\overline{y})^2}} \]
tf1 <- getFreqDir("testData")
res <-cor(tf1)
round(res,2)
## test1 test2 test3 test4
## test1 1.00 -0.29 -0.38 -0.35
## test2 -0.29 1.00 0.80 0.96
## test3 -0.38 0.80 1.00 0.94
## test4 -0.35 0.96 0.94 1.00
corr <- simil(t(tf1))
round(corr, 2)
## test1 test2 test3
## test2 -0.29
## test3 -0.38 0.80
## test4 -0.35 0.96 0.94
corr <- simil(t(tf1), diag=T)
round(corr, 2)
## test1 test2 test3 test4
## test1 NA
## test2 -0.29 NA
## test3 -0.38 0.80 NA
## test4 -0.35 0.96 0.94 NA
\[Cos(x,y)= \frac{\sum x_{i} y_{i}}{\sqrt{\sum x_{i}^2\sum y_{i}^2}} \]
simil(t(tf1), method="cosine")
## test1 test2 test3
## test2 0.2526633
## test3 0.2628980 0.8973604
## test4 0.2643935 0.9764635 0.9714201
tf2 <- getFreqDir("univ")
res <-simil(t(tf2), method="cosine")
round(res,2)
## kyoto1 kyoto2 osaka1 osaka2 osaka3 osaka4 tokyo1 tokyo2 waseda1
## kyoto2 0.86
## osaka1 0.77 0.77
## osaka2 0.83 0.84 0.84
## osaka3 0.88 0.85 0.81 0.90
## osaka4 0.85 0.82 0.80 0.85 0.88
## tokyo1 0.81 0.83 0.72 0.80 0.84 0.82
## tokyo2 0.83 0.79 0.74 0.79 0.80 0.83 0.84
## waseda1 0.87 0.83 0.77 0.81 0.82 0.82 0.77 0.84
## waseda2 0.82 0.76 0.70 0.73 0.76 0.77 0.73 0.80 0.87
res2<-simil(t(tf2), method="cosine", diag=T)
res2<-as.matrix(res2)
res2[is.na(res2)] <- 1
round(res2,2)
## kyoto1 kyoto2 osaka1 osaka2 osaka3 osaka4 tokyo1 tokyo2 waseda1 waseda2
## kyoto1 1.00 0.86 0.77 0.83 0.88 0.85 0.81 0.83 0.87 0.82
## kyoto2 0.86 1.00 0.77 0.84 0.85 0.82 0.83 0.79 0.83 0.76
## osaka1 0.77 0.77 1.00 0.84 0.81 0.80 0.72 0.74 0.77 0.70
## osaka2 0.83 0.84 0.84 1.00 0.90 0.85 0.80 0.79 0.81 0.73
## osaka3 0.88 0.85 0.81 0.90 1.00 0.88 0.84 0.80 0.82 0.76
## osaka4 0.85 0.82 0.80 0.85 0.88 1.00 0.82 0.83 0.82 0.77
## tokyo1 0.81 0.83 0.72 0.80 0.84 0.82 1.00 0.84 0.77 0.73
## tokyo2 0.83 0.79 0.74 0.79 0.80 0.83 0.84 1.00 0.84 0.80
## waseda1 0.87 0.83 0.77 0.81 0.82 0.82 0.77 0.84 1.00 0.87
## waseda2 0.82 0.76 0.70 0.73 0.76 0.77 0.73 0.80 0.87 1.00