Lecture3: 関数の作成
Warmup Practice
作業ディレクトリの設定
setwd("/cloud/project")
準備:単語リスト
txt<-readLines("sample_texts/sample_en.txt")
wordLst<-strsplit(txt,"[[:space:]]|[[:punct:]]")
wordLst<-unlist(wordLst)
wordLst<-tolower(wordLst)
wordLst<- wordLst[wordLst != ""]
前回の補足
strsplit(txt," |[.,!?:...]")
単語のToken数
tokens <- length(wordLst)
単語のTypes数
- unique()関数は,リストの重複しない要素を返す
types <- length(unique(wordLst))
結果出力
print(paste("The number of tokens: ", tokens))
print(paste("The number of types: ", types))
関数の作成
返り(戻り)値なし関数:printTTR関数を作成
引数: 単語リスト
結果出力: TTRの結果
printTTR<- function(wLst) {
num_tokens <- length(wLst)
num_types <- length(unique(wLst))
res_TTR <- num_types/num_tokens * 100
paste("TTR =", res_TTR)
}
printTTR関数の実行
printTTR(wordLst)
返り(戻り)値あり関数:calcTTR1関数を作成
引数: 単語リスト
戻り値: TTR計算値
calcTTR1<- function(wLst) {
num_tokens <- length(wLst)
num_types <- length(unique(wLst))
res_TTR <- num_types/num_tokens * 100
return(res_TTR)
}
calcTTR1関数の実行
calcTTR1(wordLst)
戻り値の利用
res <- calcTTR1(wordLst)
round(res,1)
返り(戻り)値あり関数:calcTTR2関数を作成
引数: 単語リスト
戻り値: TTR計算値
calcTTR2<- function(arg_tokens,arg_types) {
res_TTR <- arg_types/arg_tokens * 100
return(res_TTR)
}
calcTTR2関数の実行
calcTTR2(tokens,types)
ファイルからの読み込み
calcTTR3(wordLst)
[1] 77.46479
練習Lec04の課題: RTTR(Root Type-Token Ratio) Guiraudの値を計算する関数calcRTTRの作成
引数: 英文テキストファイル
戻り値: Guiraud計算値
calcRTTR関数の実行
calcRTTR("sample_texts/sample_en.txt")
Word Frequencies
(freq <- table(wordLst))
Sort
(freq_data<-sort(freq, decreasing=TRUE))
頻度テーブルをデータ型に変換
freqData <- data.frame(freq_data)
freqData
相対頻度テーブル
(relative<-sort(freq/tokens, decreasing=TRUE))
#sum(freq/tokens)
相対頻度テーブルをデータ型に変換
(relativeData <- data.frame(relative))
2つのデータ型変数を連結(merge)
freqMtx <- merge(freqData, relativeData, all=T, by="wordLst")
freqMtx
freqMtx[,1]
出現単語の情報を行ラベルにコピー
rownames(freqMtx)<-as.character(freqMtx[,1])
出現単語の情報(1列目)を削除
freqMtx<-freqMtx[-1]
colnames(freqMtx) <- c("raw", "relative")
LS0tCnRpdGxlOiAiREhfQjogTGVjdHVyZTAzIChGYWxsIDIwMjEpIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIExlY3R1cmUzOiDplqLmlbDjga7kvZzmiJAKCiMjIFdhcm11cCBQcmFjdGljZQojIyDkvZzmpa3jg4fjgqPjg6zjgq/jg4jjg6rjga7oqK3lrpoKYGBge3J9CnNldHdkKCIvY2xvdWQvcHJvamVjdCIpCmBgYAoKIyMg5L2c5qWt44OH44Kj44Os44Kv44OI44Oq44Gu56K66KqNIApgYGB7cn0KZ2V0d2QoKQpgYGAKCiMjIOa6luWCme+8muWNmOiqnuODquOCueODiApgYGB7cn0KdHh0PC1yZWFkTGluZXMoInNhbXBsZV90ZXh0cy9zYW1wbGVfZW4udHh0IikKd29yZExzdDwtc3Ryc3BsaXQodHh0LCJbWzpzcGFjZTpdXXxbWzpwdW5jdDpdXSIpCndvcmRMc3Q8LXVubGlzdCh3b3JkTHN0KQp3b3JkTHN0PC10b2xvd2VyKHdvcmRMc3QpCndvcmRMc3Q8LSB3b3JkTHN0W3dvcmRMc3QgIT0gIiJdCmBgYAoKIyMg5YmN5Zue44Gu6KOc6LazCmBgYHtyfQpzdHJzcGxpdCh0eHQsIiB8Wy4sIT86Li4uXSIpCmBgYAoKIyMjIOe1kOaenOmDqOWIhuWHuuWKmwpgYGB7cn0KaGVhZCh3b3JkTHN0KQpgYGAKCiMjIyDljZjoqp7jga5Ub2tlbuaVsApgYGB7cn0KdG9rZW5zIDwtIGxlbmd0aCh3b3JkTHN0KQpgYGAKCiMjIyDljZjoqp7jga5UeXBlc+aVsAoqIHVuaXF1ZSgp6Zai5pWw44Gv77yM44Oq44K544OI44Gu6YeN6KSH44GX44Gq44GE6KaB57Sg44KS6L+U44GZCmBgYHtyfQp0eXBlcyA8LSBsZW5ndGgodW5pcXVlKHdvcmRMc3QpKQpgYGAKCiMjIyDntZDmnpzlh7rlipsKYGBge3J9CnByaW50KHBhc3RlKCJUaGUgbnVtYmVyIG9mIHRva2VuczogIiwgdG9rZW5zKSkKcHJpbnQocGFzdGUoIlRoZSBudW1iZXIgb2YgdHlwZXM6ICIsIHR5cGVzKSkKYGBgCgojIOmWouaVsOOBruS9nOaIkAojIyDov5TjgorvvIjmiLvjgorvvInlgKTjgarjgZfplqLmlbDvvJpwcmludFRUUumWouaVsOOCkuS9nOaIkAojIyMg5byV5pWwOiDljZjoqp7jg6rjgrnjg4gKIyMjIOe1kOaenOWHuuWKmzogVFRS44Gu57WQ5p6cCmBgYHtyfQpwcmludFRUUjwtIGZ1bmN0aW9uKHdMc3QpIHsKICAgIG51bV90b2tlbnMgPC0gbGVuZ3RoKHdMc3QpCiAgICBudW1fdHlwZXMgPC0gbGVuZ3RoKHVuaXF1ZSh3THN0KSkKICAgIHJlc19UVFIgPC0gbnVtX3R5cGVzL251bV90b2tlbnMgKiAxMDAKICAgIHBhc3RlKCJUVFIgPSIsIHJlc19UVFIpCn0KYGBgCgojIyBwcmludFRUUumWouaVsOOBruWun+ihjApgYGB7cn0KcHJpbnRUVFIod29yZExzdCkKYGBgCgojIyDov5TjgorvvIjmiLvjgorvvInlgKTjgYLjgorplqLmlbDvvJpjYWxjVFRSMemWouaVsOOCkuS9nOaIkAojIyMg5byV5pWwOiDljZjoqp7jg6rjgrnjg4gKIyMjIOaIu+OCiuWApDogVFRS6KiI566X5YCkCmBgYHtyfQpjYWxjVFRSMTwtIGZ1bmN0aW9uKHdMc3QpIHsKICAgIG51bV90b2tlbnMgPC0gbGVuZ3RoKHdMc3QpCiAgICBudW1fdHlwZXMgPC0gbGVuZ3RoKHVuaXF1ZSh3THN0KSkKICAgIHJlc19UVFIgPC0gbnVtX3R5cGVzL251bV90b2tlbnMgKiAxMDAKICAgIHJldHVybihyZXNfVFRSKQp9CmBgYAoKIyMgY2FsY1RUUjHplqLmlbDjga7lrp/ooYwKYGBge3J9CmNhbGNUVFIxKHdvcmRMc3QpCmBgYAojIyAg5oi744KK5YCk44Gu5Yip55SoCmBgYHtyfQpyZXMgPC0gY2FsY1RUUjEod29yZExzdCkKcm91bmQocmVzLDEpCmBgYAoKIyMg6L+U44KK77yI5oi744KK77yJ5YCk44GC44KK6Zai5pWw77yaY2FsY1RUUjLplqLmlbDjgpLkvZzmiJAKIyMjIOW8leaVsDog5Y2Y6Kqe44Oq44K544OICiMjIyDmiLvjgorlgKQ6IFRUUuioiOeul+WApApgYGB7cn0KY2FsY1RUUjI8LSBmdW5jdGlvbihhcmdfdG9rZW5zLGFyZ190eXBlcykgewogICAgcmVzX1RUUiA8LSBhcmdfdHlwZXMvYXJnX3Rva2VucyAqIDEwMAogICAgcmV0dXJuKHJlc19UVFIpCn0KYGBgCgojIyBjYWxjVFRSMumWouaVsOOBruWun+ihjApgYGB7cn0KY2FsY1RUUjIodG9rZW5zLHR5cGVzKQpgYGAKCgojIyDjg5XjgqHjgqTjg6vjgYvjgonjga7oqq3jgb/ovrzjgb8KYGBge3J9CnNvdXJjZSgidXRpbHMuUiIpCmNhbGNUVFIzKHdvcmRMc3QpCmBgYAoKIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBibHVlO3RleHQtZGVjb3JhdGlvbjogbGluZS10aHJvdWdoOyI+57e057+SPC9zcGFuPjxzcGFuIHN0eWxlPSJjb2xvcjogcmVkOyAiPkxlYzA044Gu6Kqy6aGMPC9zcGFuPjogUlRUUihSb290IFR5cGUtVG9rZW4gUmF0aW8pIEc8c3BhbiBzdHlsZT0iY29sb3I6IHJlZDsgIj51PC9zcGFuPmlyYXVk44Gu5YCk44KS6KiI566X44GZ44KLPHNwYW4gc3R5bGU9ImNvbG9yOiByZWQ7ICI+6Zai5pWwY2FsY1JUVFI8L3NwYW4+44Gu5L2c5oiQ44CACiMjIyDlvJXmlbA6ICDoi7Hmlofjg4bjgq3jgrnjg4jjg5XjgqHjgqTjg6sKIyMjIOaIu+OCiuWApDogRzxzcGFuIHN0eWxlPSJjb2xvcjogcmVkOyAiPnU8L3NwYW4+aXJhdWToqIjnrpflgKQKYGBge3IsIGVjaG89RkFMU0V9CmNhbGNSVFRSPC0gZnVuY3Rpb24oZm5hbWUpIHsKICAgIHR4dDwtcmVhZExpbmVzKGZuYW1lKQogICAgd29yZExzdDwtc3Ryc3BsaXQodHh0LCJbWzpzcGFjZTpdXXxbWzpwdW5jdDpdXSIpCiAgICB3b3JkTHN0PC11bmxpc3Qod29yZExzdCkKICAgIHdvcmRMc3Q8LXRvbG93ZXIod29yZExzdCkKICAgIHdvcmRMc3Q8LSB3b3JkTHN0W3dvcmRMc3QgIT0gIiJdCiAgICBudW1fdG9rZW5zIDwtIGxlbmd0aCh3b3JkTHN0KQogICAgbnVtX3R5cGVzIDwtIGxlbmd0aCh1bmlxdWUod29yZExzdCkpCiAgICByZXNfRyA8LSBudW1fdHlwZXMvc3FydChudW1fdG9rZW5zKQogICAgcmV0dXJuKHJlc19HKQp9CmBgYAoKIyMgY2FsY1JUVFLplqLmlbDjga7lrp/ooYwKYGBge3J9CmNhbGNSVFRSKCJzYW1wbGVfdGV4dHMvc2FtcGxlX2VuLnR4dCIpCmBgYAoKIyMgV29yZCBGcmVxdWVuY2llcwpgYGB7cn0KKGZyZXEgPC0gdGFibGUod29yZExzdCkpCmBgYAoKIyMgU29ydApgYGB7cn0KKGZyZXFfZGF0YTwtc29ydChmcmVxLCBkZWNyZWFzaW5nPVRSVUUpKQpgYGAKCiMjIOmgu+W6puODhuODvOODluODq+OCkuODh+ODvOOCv+Wei+OBq+WkieaPmwpgYGB7cn0KZnJlcURhdGEgPC0gZGF0YS5mcmFtZShmcmVxX2RhdGEpCmZyZXFEYXRhCmBgYAoKIyMg55u45a++6aC75bqm44OG44O844OW44OrCmBgYHtyfQoocmVsYXRpdmU8LXNvcnQoZnJlcS90b2tlbnMsIGRlY3JlYXNpbmc9VFJVRSkpCiNzdW0oZnJlcS90b2tlbnMpCmBgYAojIyDnm7jlr77poLvluqbjg4bjg7zjg5bjg6vjgpLjg4fjg7zjgr/lnovjgavlpInmj5sKYGBge3J9CihyZWxhdGl2ZURhdGEgPC0gZGF0YS5mcmFtZShyZWxhdGl2ZSkpCmBgYAojIyDvvJLjgaTjga7jg4fjg7zjgr/lnovlpInmlbDjgpLpgKPntZAobWVyZ2UpCmBgYHtyfQpmcmVxTXR4IDwtIG1lcmdlKGZyZXFEYXRhLCByZWxhdGl2ZURhdGEsIGFsbD1ULCBieT0id29yZExzdCIpCmBgYAoKYGBge3J9CmZyZXFNdHgKYGBgCgpgYGB7cn0KZnJlcU10eFssMV0KYGBgCgojIyMg5Ye654++5Y2Y6Kqe44Gu5oOF5aCx44KS6KGM44Op44OZ44Or44Gr44Kz44OU44O8CmBgYHtyfQpyb3duYW1lcyhmcmVxTXR4KTwtYXMuY2hhcmFjdGVyKGZyZXFNdHhbLDFdKQpgYGAKCiMjIOWHuuePvuWNmOiqnuOBruaDheWgsSgx5YiX55uuKeOCkuWJiumZpApgYGB7cn0KZnJlcU10eDwtZnJlcU10eFstMV0KY29sbmFtZXMoZnJlcU10eCkgPC0gYygicmF3IiwgInJlbGF0aXZlIikKYGBgCgojIyDnspfpoLvluqYt55u45a++6aC75bqm44Gu6KGM5YiXCmBgYHtyfQpmcmVxTXR4CmBgYAoKCgoK