source("getFreqMtxDir.R")
テキストファイルが格納されているフォルダを指定
res <- getFreqMtxDir("msgs2", encoding = "sjis")
dim(res)
## [1] 1419 10
res[1222:1225, ]
## ColumbiaU HokkaidoU KagoshimaU KUFS NoethwesternU osaka Rikkyo
## students 1 8 4 6 4 0 1
## studied 0 0 0 0 0 0 0
## studies 0 1 0 2 0 1 1
## study 1 2 0 2 0 0 1
## SeinanGakuinU TUM Yale
## students 1 1 1
## studied 0 1 0
## studies 1 0 1
## study 1 2 0
write.csv(res, "msg2.csv")
res[rownames(res) == "000", ]
## ColumbiaU HokkaidoU KagoshimaU KUFS NoethwesternU osaka Rikkyo
## 000 0 0 2 0 0 0 0
## SeinanGakuinU TUM Yale
## 000 0 1 0
res[rownames(res) == "the", ]
## ColumbiaU HokkaidoU KagoshimaU KUFS NoethwesternU osaka Rikkyo
## the 19 28 32 25 30 41 57
## SeinanGakuinU TUM Yale
## the 32 31 8
res[(rownames(res) == "000") | (rownames(res) == "10"), ]
## ColumbiaU HokkaidoU KagoshimaU KUFS NoethwesternU osaka Rikkyo
## 000 0 0 2 0 0 0 0
## SeinanGakuinU TUM Yale
## 000 0 1 0
res[rownames(res) %in% c("000", "10"), ]
## ColumbiaU HokkaidoU KagoshimaU KUFS NoethwesternU osaka Rikkyo
## 000 0 0 2 0 0 0 0
## SeinanGakuinU TUM Yale
## 000 0 1 0
noise <- c("a", "an", "the")
res[rownames(res) %in% noise, ]
## ColumbiaU HokkaidoU KagoshimaU KUFS NoethwesternU osaka Rikkyo
## a 9 3 10 12 9 15 14
## an 0 4 2 1 1 1 3
## the 19 28 32 25 30 41 57
## SeinanGakuinU TUM Yale
## a 6 13 6
## an 4 2 1
## the 32 31 8
res[!rownames(res) %in% noise, ]
文頭:^
文末:$
任意の文字:.
直前の文字を一回以上:+
tmp <- res[grep(rownames(res), pattern = "^[[:digit:]]"), ]
rownames(tmp)
res[rownames(res) %in% rownames(tmp), ]
res[!rownames(res) %in% rownames(tmp), ]
tmp <- res[grep(rownames(res), pattern = "^[[:alpha:]]"), ]
rownames(tmp)
res[rownames(res) %in% rownames(tmp), ]
tmp <- res[grep(rownames(res), pattern = "ly$"), ]
rownames(tmp)
## [1] "accordingly" "actively" "annually"
## [4] "approximately" "boldly" "clearly"
## [7] "closely" "collaboratively" "complexly"
## [10] "comprehensively" "consistently" "continually"
## [13] "continuously" "crucially" "currently"
## [16] "daily" "deeply" "directly"
## [19] "eagerly" "early" "economically"
## [22] "effectively" "equally" "eventually"
## [25] "fairly" "family" "finally"
## [28] "genuinely" "globally" "highly"
## [31] "ideally" "importantly" "internationally"
## [34] "italy" "largely" "lastly"
## [37] "locally" "logically" "nearly"
## [40] "newly" "only" "particularly"
## [43] "personally" "politically" "previously"
## [46] "properly" "rapidly" "regularly"
## [49] "significantly" "simply" "simultaneously"
## [52] "socially" "spiritually" "strongly"
## [55] "successfully" "technically" "unconsciously"
res[rownames(res) %in% rownames(tmp), ]
## ColumbiaU HokkaidoU KagoshimaU KUFS NoethwesternU osaka
## accordingly 0 1 0 0 0 0
## actively 0 0 1 0 0 0
## annually 0 0 0 0 1 0
## approximately 0 0 1 0 0 0
## boldly 0 0 0 0 0 0
## clearly 0 0 0 1 0 0
## closely 0 0 0 0 0 0
## collaboratively 0 0 0 1 0 0
## complexly 0 0 0 0 0 0
## comprehensively 0 0 0 0 0 0
## consistently 0 0 0 0 0 0
## continually 0 0 0 0 0 0
## continuously 0 0 0 0 0 0
## crucially 0 0 0 1 0 0
## currently 0 1 0 0 0 0
## daily 0 1 0 2 0 0
## deeply 0 0 0 0 0 0
## directly 0 0 0 0 0 0
## eagerly 0 0 0 0 0 0
## early 0 0 0 0 0 1
## economically 0 0 0 0 0 0
## effectively 0 0 0 1 0 0
## equally 0 0 0 2 0 0
## eventually 0 0 0 0 0 2
## fairly 0 0 0 1 0 0
## family 0 1 0 0 0 0
## finally 0 0 0 0 1 0
## genuinely 0 0 0 1 0 0
## globally 0 1 0 0 0 1
## highly 0 0 1 0 0 0
## ideally 1 0 0 0 0 0
## importantly 1 0 0 0 0 0
## internationally 0 0 1 0 0 0
## italy 0 0 0 0 0 0
## largely 0 0 0 1 0 0
## lastly 0 0 0 1 0 0
## locally 0 0 1 0 0 1
## logically 0 0 0 0 0 0
## nearly 0 0 0 0 0 0
## newly 0 0 1 0 0 0
## only 0 0 0 0 0 1
## particularly 0 0 0 0 1 0
## personally 0 0 0 0 1 0
## politically 0 0 0 0 0 0
## previously 0 0 0 1 0 0
## properly 0 0 0 0 0 0
## rapidly 0 0 0 0 0 0
## regularly 0 0 0 0 0 1
## significantly 0 0 1 0 0 0
## simply 0 1 0 0 0 0
## simultaneously 0 0 0 0 1 0
## socially 0 0 0 0 0 0
## spiritually 0 0 0 0 0 1
## strongly 0 0 0 0 1 0
## successfully 0 0 0 0 1 0
## technically 0 0 0 0 0 0
## unconsciously 0 1 0 0 0 0
## Rikkyo SeinanGakuinU TUM Yale
## accordingly 0 0 0 0
## actively 0 0 0 0
## annually 0 0 0 0
## approximately 0 0 0 0
## boldly 0 0 1 0
## clearly 0 0 0 0
## closely 1 0 0 0
## collaboratively 0 0 0 0
## complexly 1 0 0 0
## comprehensively 0 0 1 0
## consistently 1 0 0 0
## continually 0 0 1 0
## continuously 1 0 0 0
## crucially 0 0 0 0
## currently 1 0 0 0
## daily 0 0 0 0
## deeply 1 0 0 1
## directly 1 0 0 0
## eagerly 0 0 0 1
## early 0 0 1 0
## economically 0 1 0 0
## effectively 0 0 0 0
## equally 0 0 0 0
## eventually 0 0 0 0
## fairly 0 0 0 0
## family 0 0 1 0
## finally 0 0 0 0
## genuinely 0 0 0 0
## globally 0 1 0 0
## highly 0 1 0 0
## ideally 0 0 0 0
## importantly 0 0 0 0
## internationally 0 1 0 0
## italy 0 1 0 0
## largely 0 0 0 0
## lastly 0 0 0 0
## locally 0 2 0 0
## logically 1 0 0 0
## nearly 0 0 1 0
## newly 0 0 0 0
## only 0 0 1 0
## particularly 0 0 0 0
## personally 0 0 0 0
## politically 1 1 0 0
## previously 0 0 0 0
## properly 1 0 0 0
## rapidly 0 2 0 0
## regularly 0 0 0 0
## significantly 1 0 0 0
## simply 0 0 0 0
## simultaneously 0 0 0 0
## socially 0 1 0 0
## spiritually 0 0 0 0
## strongly 0 0 0 0
## successfully 1 0 0 0
## technically 0 0 1 0
## unconsciously 0 0 0 0
## [1] "decoded" "demanded" "destroyed" "developed"
## [5] "discovered" "distinguished"
## ColumbiaU HokkaidoU KagoshimaU KUFS NoethwesternU osaka
## decoded 0 0 0 0 0 0
## demanded 0 0 0 0 1 0
## destroyed 0 0 0 0 0 0
## developed 0 0 2 0 0 0
## discovered 1 0 0 0 0 0
## distinguished 0 0 0 1 0 0
## Rikkyo SeinanGakuinU TUM Yale
## decoded 1 0 0 0
## demanded 0 0 0 0
## destroyed 0 0 1 0
## developed 1 0 1 0
## discovered 0 0 0 0
## distinguished 0 0 1 0