library(RMeCab)
library(knitr)
pndic <- read.table(
"http://www.lr.pi.titech.ac.jp/~takamura/pubs/pn_ja.dic",
sep = ":",
col.names = c("term", "kana", "pos", "value"),
colClasses = c("character", "character", "factor", "numeric"),
fileEncoding = "Shift_JIS")
str(pndic)
## 'data.frame': 55125 obs. of 4 variables:
## $ term : chr "優れる" "良い" "喜ぶ" "褒める" ...
## $ kana : chr "すぐれる" "よい" "よろこぶ" "ほめる" ...
## $ pos : Factor w/ 5 levels "形容詞","助動詞",..: 3 1 3 3 1 1 1 3 5 3 ...
## $ value: num 1 1 1 1 1 ...
pndic2 <- aggregate(value ~ term + pos, pndic, mean)
str(pndic2)
## 'data.frame': 52689 obs. of 3 variables:
## $ term : chr "あくどい" "あざとい" "あたじけない" "あっけ" ...
## $ pos : Factor w/ 5 levels "形容詞","助動詞",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ value: num -0.991 -0.983 -0.993 -0.976 0.993 ...
tw1 <- "すもももももももものうち"
tw2 <- "今日もしんどい"
tw <- c(tw1,tw2)
tw.pn <- c()
for(i in 1:2){
tmp <-data.frame(
term = unlist(RMeCabC(tw[i])),
pos = attr(unlist(RMeCabC(tw[i])),"names"))
tw.pn[i] <- sum(merge(tmp, pndic2,
by.x = c("term", "pos"), by.y = c("term", "pos"))[,3])
}
kable(data.frame(text = tw, score = tw.pn))
| すもももももももものうち |
0.0000000 |
| 今日もしんどい |
-0.7757595 |