作業ディレクトリの設定
setwd("/cloud/project")
絵文字・装飾文字の処理
サンプル文
text_en <- "Wishing ✨ you cozy autumn days 🎃 filled with colorful leaves 🍁 "
text_ja <- "色鮮やかな葉 🍁 とともに、ほっこりとした秋の日々を ✨ お過ごしください🎃 !"
text_fa <- "آرزوی ✨ روزهای پاییزی دنج 🎃 برای شما، پر از برگهای رنگارنگ 🍁!"
絵文字・装飾文字の検索
uni_emoji_pattern <- "[\U{1F600}-\U{1F64F}\U{1F300}-\U{1F5FF}\U{1F680}-\U{1F6FF}\U{1F1E0}-\U{1F1FF}\U{2600}-\U{26FF}\U{2700}-\U{27BF}]"
#return the emoji positions in a given text
emoji_positions <- gregexpr(uni_emoji_pattern, text_en)
#extract the matched emojis
regmatches(text_en, emoji_positions)
[[1]]
[1] "✨" "🎃" "🍁"
絵文字・装飾文字の削除
gsub(uni_emoji_pattern, "", text_en)
[1] "Wishing you cozy autumn days filled with colorful leaves "
「絵文字・装飾文字削除」関数作成
remove_emojis <- function(txt) {
emoji_pattern <- "[\U{1F600}-\U{1F64F}\U{1F300}-\U{1F5FF}\U{1F680}-\U{1F6FF}\U{1F1E0}-\U{1F1FF}\U{2600}-\U{26FF}\U{2700}-\U{27BF}]"
gsub(emoji_pattern, "", txt)
}
Run the function with text_en
remove_emojis(text_en)
[1] "Wishing you cozy autumn days filled with colorful leaves "
Run the function with text_ja
remove_emojis(text_ja)
[1] "色鮮やかな葉 とともに、ほっこりとした秋の日々を お過ごしください !"
Run the function with text_fa
remove_emojis(text_fa)
[1] "آرزوی روزهای پاییزی دنج برای شما، پر از برگهای رنگارنگ !"
install.packages("cld3")
install.packages("jsonlite")
library(cld3)
library(jsonlite)
ライブラリの読み込み
library(udpipe)
Annotation with UDPipe
parsed_sentence <- udpipe(remove_emojis(text_en), tolower(lang_name))
head(parsed_sentence)
View関数
View(parsed_sentence)
列名を抽出
colnames(parsed_sentence)
[1] "doc_id" "paragraph_id" "sentence_id" "sentence" "start"
[6] "end" "term_id" "token_id" "token" "lemma"
[11] "upos" "xpos" "feats" "head_token_id" "dep_rel"
[16] "deps" "misc"
head
head(parsed_sentence[c("token_id", "token", "head_token_id")])
Word Freciencies
freqByUDPipe_lemma<-table(parsed_sentence$lemma)
freqByUDPipe_lemma
autumn colorful cozy day fill leave wish with you
1 1 1 1 1 1 1 1 1
Put the first column’s values into the rownames
colnames(freqData)
[1] "Var1" "Freq"
rownames(freqData) <- freqData$Var1
head(freqData)
Delete a specific column
freqData <- freqData[-1]
head(freqData)
係受け解析の視覚化
関数ファイルの読み込み
source("func_plot_annotation.R")
視覚化
plot_annotation(parsed_sentence, size = 4)


課題1(締め切り11月5日)
入力文の係り受け解析結果を描画出力する関数を作成してください
- 条件1: 入力文に絵・装飾文字が含まれている場合は、除去する
- 条件2: 関数の引数は文字列
- NOTE:
関数の名前は自由に付けてください。課題ができたら、メールで連絡してください。(連絡後、posit上でコードと、実行結果を確認します)
関数の実行例1
source("dep_relation_viz.R")
dep_relation_viz(text_en)


関数の実行例2
text_fr <- "Je vous souhaite ✨ des journées d'automne douillettes 🎃 remplies de feuilles colorées 🍁!"
dep_relation_viz(text_fr)


LS0tCnRpdGxlOiAiTGVjMDQ6IOmgu+W6puihqOS9nOaIkO+8iERhdGEgRnJhbWXlnovvvIkiCm91dHB1dDogaHRtbF9ub3RlYm9vawplZGl0b3Jfb3B0aW9uczogCiAgY2h1bmtfb3V0cHV0X3R5cGU6IGlubGluZQotLS0KIyMg5L2c5qWt44OH44Kj44Os44Kv44OI44Oq44Gu6Kit5a6aCmBgYHtyfQpzZXR3ZCgiL2Nsb3VkL3Byb2plY3QiKQpgYGAKCiMg57W15paH5a2X44O76KOF6aO+5paH5a2X44Gu5Yem55CGCiMjIyDlj4LogIPos4fmlpkKLSA8YSBocmVmPSJodHRwczovL3d3dy51bmljb2RlLm9yZy9yZXBvcnRzL3RyNTEvdHI1MS0yNy5odG1sIiB0YXJnZXQ9Il9ibGFuayI+VW5pY29kZSBFbW9qaTwvYT4KCgojIyMg44K144Oz44OX44Or5paHCmBgYHtyfQp0ZXh0X2VuIDwtICJXaXNoaW5nIOKcqCB5b3UgY296eSBhdXR1bW4gZGF5cyDwn46DIGZpbGxlZCB3aXRoIGNvbG9yZnVsIGxlYXZlcyDwn42BICIKdGV4dF9qYSA8LSAi6Imy6a6u44KE44GL44Gq6JGJIPCfjYEg44Go44Go44KC44Gr44CB44G744Gj44GT44KK44Go44GX44Gf56eL44Gu5pel44CF44KSIOKcqCDjgYrpgY7jgZTjgZfjgY/jgaDjgZXjgYTwn46DIO+8gSIKdGV4dF9mYSA8LSAi2KLYsdiy2YjbjCDinKgg2LHZiNiy2YfYp9uMINm+2KfbjNuM2LLbjCDYr9mG2Kwg8J+OgyDYqNix2KfbjCDYtNmF2KfYjCDZvtixINin2LIg2KjYsdqv4oCM2YfYp9uMINix2Ybar9in2LHZhtqvIPCfjYEhIgpgYGAKCiMjIOe1teaWh+Wtl+ODu+ijhemjvuaWh+Wtl+OBruaknOe0ogotIDxhIGhyZWY9Imh0dHBzOi8vd3d3LnJkb2N1bWVudGF0aW9uLm9yZy9wYWNrYWdlcy9iYXNlL3ZlcnNpb25zLzMuNi4yL3RvcGljcy9ncmVwIiB0YXJnZXQ9Il9ibGFuayI+Z3N1YjogUGF0dGVybiBNYXRjaGluZyBhbmQgUmVwbGFjZW1lbnQ8L2E+Ci0gPGEgaHJlZj0iaHR0cHM6Ly93d3cucmRvY3VtZW50YXRpb24ub3JnL3BhY2thZ2VzL2Jhc2UvdmVyc2lvbnMvMy42LjIvdG9waWNzL3JlZ21hdGNoZXMiIHRhcmdldD0iX2JsYW5rIj5yZWdtYXRjaGVzOiBFeHRyYWN0IG9yIFJlcGxhY2UgTWF0Y2hlZCBTdWJzdHJpbmdzPC9hPgoKYGBge3J9CnVuaV9lbW9qaV9wYXR0ZXJuIDwtICJbXFV7MUY2MDB9LVxVezFGNjRGfVxVezFGMzAwfS1cVXsxRjVGRn1cVXsxRjY4MH0tXFV7MUY2RkZ9XFV7MUYxRTB9LVxVezFGMUZGfVxVezI2MDB9LVxVezI2RkZ9XFV7MjcwMH0tXFV7MjdCRn1dIgoKI3JldHVybiB0aGUgZW1vamkgcG9zaXRpb25zIGluIGEgZ2l2ZW4gdGV4dAplbW9qaV9wb3NpdGlvbnMgPC0gZ3JlZ2V4cHIodW5pX2Vtb2ppX3BhdHRlcm4sIHRleHRfZW4pCgojZXh0cmFjdCB0aGUgbWF0Y2hlZCBlbW9qaXMKcmVnbWF0Y2hlcyh0ZXh0X2VuLCBlbW9qaV9wb3NpdGlvbnMpCmBgYAoKIyMg57W15paH5a2X44O76KOF6aO+5paH5a2X44Gu5YmK6ZmkCmBgYHtyfQpnc3ViKHVuaV9lbW9qaV9wYXR0ZXJuLCAiIiwgdGV4dF9lbikKYGBgCgojIyDjgIzntbXmloflrZfjg7voo4Xpo77mloflrZfliYrpmaTjgI3plqLmlbDkvZzmiJAKYGBge3J9CnJlbW92ZV9lbW9qaXMgPC0gZnVuY3Rpb24odHh0KSB7CiAgICBlbW9qaV9wYXR0ZXJuIDwtICJbXFV7MUY2MDB9LVxVezFGNjRGfVxVezFGMzAwfS1cVXsxRjVGRn1cVXsxRjY4MH0tXFV7MUY2RkZ9XFV7MUYxRTB9LVxVezFGMUZGfVxVezI2MDB9LVxVezI2RkZ9XFV7MjcwMH0tXFV7MjdCRn1dIgogICAgZ3N1YihlbW9qaV9wYXR0ZXJuLCAiIiwgdHh0KQp9CmBgYAoKIyMjIFJ1biB0aGUgZnVuY3Rpb24gd2l0aCB0ZXh0X2VuCmBgYHtyfQpyZW1vdmVfZW1vamlzKHRleHRfZW4pCmBgYAojIyMgUnVuIHRoZSBmdW5jdGlvbiB3aXRoIHRleHRfamEKYGBge3J9CnJlbW92ZV9lbW9qaXModGV4dF9qYSkKYGBgCiMjIyBSdW4gdGhlIGZ1bmN0aW9uIHdpdGggdGV4dF9mYQpgYGB7cn0KcmVtb3ZlX2Vtb2ppcyh0ZXh0X2ZhKQpgYGAKCiMg5L2/55So6KiA6Kqe44Gu54m55a6aOiA8YSBocmVmPSJodHRwczovL2dpdGh1Yi5jb20vZ29vZ2xlL2NsZDMiIHRhcmdldD0iX2JsYW5rIj5Db21wYWN0IExhbmd1YWdlIERldGVjdG9yIHYzIChDTEQzKTwvYT4KLSA8YSBocmVmPSJodHRwczovL2RvY3Mucm9wZW5zY2kub3JnL2NsZDMvIiB0YXJnZXQ9Il9ibGFuayI+UiBXcmFwcGVyIGZvciBHb29nbGXigJlzIENvbXBhY3QgTGFuZ3VhZ2UgRGV0ZWN0b3IgMzwvYT4KYGBge3IsIGV2YWw9RkFMU0V9Cmluc3RhbGwucGFja2FnZXMoImNsZDMiKQppbnN0YWxsLnBhY2thZ2VzKCJqc29ubGl0ZSIpCmBgYAoKYGBge3J9CmxpYnJhcnkoY2xkMykKbGlicmFyeShqc29ubGl0ZSkKYGBgCgojIyBFeHRyYWN0IElTTyA2MzktMSBMYW5ndWFnZSBjb2RlCmBgYHtyfQpsYW5nX2NvZGUgPC0gZGV0ZWN0X2xhbmd1YWdlKHRleHRfZW4pCmBgYAoKIyMgRXh0cmFjdCBMYW5ndWFnZSBuYW1lIAotIDxhIGhyZWY9Imh0dHBzOi8vZ2l0aHViLmNvbS91bmljb2RlLWNsZHIvY2xkci1sb2NhbGVuYW1lcy1tb2Rlcm4vYmxvYi9tYXN0ZXIvbWFpbi9lbi9sYW5ndWFnZXMuanNvbiIgdGFyZ2V0PSJfYmxhbmsiPiJsYW5ndWFnZXMuanNvbiI8L2E+IGZpbGUKYGBge3J9Cmxhbmd1YWdlc19qc29uIDwtIGZyb21KU09OKCJjbGRyLWxvY2FsZW5hbWVzLW1vZGVybi9sYW5ndWFnZXMuanNvbiIpCgojTWFwIGNvZGVzIHRvIG5hbWVzIHVzaW5nIHRoZSBKU09OIGRhdGEKbGFuZ3VhZ2VfbmFtZSA8LSBsYW5ndWFnZXNfanNvbiRtYWluJGVuJGxvY2FsZURpc3BsYXlOYW1lcyRsYW5ndWFnZXNbW2xhbmdfY29kZV1dCmxhbmd1YWdlX25hbWUKYGBgCiMjIOODhuOCreOCueODiOWHpueQhjogVURQaXBlCiMjIyDlj4LogIPos4fmlpkKLSA8YSBocmVmPSJodHRwczovL2xpbmRhdC5tZmYuY3VuaS5jei9zZXJ2aWNlcy91ZHBpcGUvIiB0YXJnZXQ9Il9ibGFuayI+TElOREFUIFJFU1Qgc2VydmljZTwvYT4KLSA8YSBocmVmPSJodHRwczovL3VmYWwubWZmLmN1bmkuY3ovdWRwaXBlIiB0YXJnZXQ9Il9ibGFuayI+VURQaXBlIFZlcnNpb25zPC9hPgotIDxhIGhyZWY9Imh0dHBzOi8vdW5pdmVyc2FsZGVwZW5kZW5jaWVzLm9yZy8iIHRhcmdldD0iX2JsYW5rIj5Vbml2ZXJzYWwgRGVwZW5kZW5jaWVzPC9hPgoKIyA8YSBocmVmPSJodHRwczovL2Jub3NhYy5naXRodWIuaW8vdWRwaXBlL2VuL2luZGV4Lmh0bWwiIHRhcmdldD0iX2JsYW5rIj5VRFBpcGUgTmF0dXJhbCBMYW5ndWFnZSBQcm9jZXNzaW5nPC9hPgoKIyMjIOODqeOCpOODluODqeODquOBruiqreOBv+i+vOOBvwpgYGB7cn0KbGlicmFyeSh1ZHBpcGUpCmBgYAoKIyMjIEFubm90YXRpb24gd2l0aCBVRFBpcGUKYGBge3J9CnBhcnNlZF9zZW50ZW5jZSA8LSB1ZHBpcGUocmVtb3ZlX2Vtb2ppcyh0ZXh0X2VuKSwgdG9sb3dlcihsYW5nX25hbWUpKQpoZWFkKHBhcnNlZF9zZW50ZW5jZSkKYGBgCiMjIyBWaWV36Zai5pWwCmBgYHtyLCBldmFsID0gRkFMU0V9ClZpZXcocGFyc2VkX3NlbnRlbmNlKQpgYGAKCiMjIyDliJflkI3jgpLmir3lh7oKYGBge3J9CmNvbG5hbWVzKHBhcnNlZF9zZW50ZW5jZSkKYGBgCgojIyMgaGVhZApgYGB7cn0KaGVhZChwYXJzZWRfc2VudGVuY2VbYygidG9rZW5faWQiLCAidG9rZW4iLCAiaGVhZF90b2tlbl9pZCIpXSkKYGBgCgojIyBXb3JkIEZyZWNpZW5jaWVzCmBgYHtyfQpmcmVxQnlVRFBpcGVfbGVtbWE8LXRhYmxlKHBhcnNlZF9zZW50ZW5jZSRsZW1tYSkKZnJlcUJ5VURQaXBlX2xlbW1hCmBgYAojIyBEYXRhIEZvcm1hdHRpbmcKIyMjIOWPguiAg+izh+aWmQotIDxhIGhyZWY9Imh0dHBzOi8vaHRzdWRhLm5ldC9zdGF0cy9kYXRhLWJhc2ljcy5odG1sI2RhdGEtYmFzaWNzLWRhdGEtc3RydWN0dXJlIiB0YXJnZXQ9Il9ibGFuayI+UuOBruODh+ODvOOCv+ani+mAoDwvYT4KCiMjIyBDaGVjayB0aGUgY2xhc3MgJiB0eXBlIG9mIGZyZXFCeVVEUGlwZV9sZW1tYQpgYGB7cn0KY2xhc3MoZnJlcUJ5VURQaXBlX2xlbW1hKQp0eXBlb2YoZnJlcUJ5VURQaXBlX2xlbW1hKQpgYGAKIyMjIENvbnZlcnQgdGhlIHRhYmxlIHRvIGEgZGF0YSBmcmFtZSBmb3JtYXQKYGBge3J9CmZyZXFEYXRhIDwtIGRhdGEuZnJhbWUoZnJlcUJ5VURQaXBlX2xlbW1hKQpgYGAKIyMjIENoZWNrIHRoZSBjbGFzcyAmIHR5cGUgb2YgZnJlcURhdGEKYGBge3J9CmNsYXNzKGZyZXFEYXRhKQp0eXBlb2YoZnJlcURhdGEpCmBgYAoKIyMgUHV0IHRoZSBmaXJzdCBjb2x1bW4ncyB2YWx1ZXMgaW50byB0aGUgcm93bmFtZXMKYGBge3J9CmNvbG5hbWVzKGZyZXFEYXRhKQpyb3duYW1lcyhmcmVxRGF0YSkgPC0gZnJlcURhdGEkVmFyMQpoZWFkKGZyZXFEYXRhKQpgYGAKCiMjIERlbGV0ZSBhIHNwZWNpZmljIGNvbHVtbgpgYGB7cn0KZnJlcURhdGEgPC0gZnJlcURhdGFbLTFdCmhlYWQoZnJlcURhdGEpCmBgYAoKIyMg5L+C5Y+X44GR6Kej5p6Q44Gu6KaW6Kaa5YyWCi0gcmVmLiA8YSBocmVmPSJodHRwczovL3d3dy5yLWJsb2dnZXJzLmNvbS8yMDE5LzA3L2RlcGVuZGVuY3ktcGFyc2luZy13aXRoLXVkcGlwZS8iIHRhcmdldD0iX2JsYW5rIj5kZXBlbmRlbmN5IHBhcnNpbmcgd2l0aCB1ZHBpcGU8L2E+CgotIDxhIGhyZWY9Imh0dHBzOi8vZ2l0aHViLmNvbS9ibm9zYWMvdGV4dHBsb3QiIHRhcmdldD0iX2JsYW5rIj50ZXh0cGxvdDwvYT4KCiMjIyDplqLmlbDjg5XjgqHjgqTjg6vjga7oqq3jgb/ovrzjgb8KYGBge3J9CnNvdXJjZSgiZnVuY19wbG90X2Fubm90YXRpb24uUiIpCmBgYAoKIyMg6KaW6Kaa5YyWCmBgYHtyfQpwbG90X2Fubm90YXRpb24ocGFyc2VkX3NlbnRlbmNlLCBzaXplID0gNCkKYGBgCgojIOiqsumhjO+8ke+8iOe3oOOCgeWIh+OCijEx5pyINeaXpe+8iQojIyDlhaXlipvmlofjga7kv4Ljgorlj5fjgZHop6PmnpDntZDmnpzjgpLmj4/nlLvlh7rlipvjgZnjgovplqLmlbDjgpLkvZzmiJDjgZfjgabjgY/jgaDjgZXjgYQKLSDmnaHku7YxOiDlhaXlipvmlofjgavntbXjg7voo4Xpo77mloflrZfjgYzlkKvjgb7jgozjgabjgYTjgovloLTlkIjjga/jgIHpmaTljrvjgZnjgosKLSDmnaHku7YyOiDplqLmlbDjga7lvJXmlbDjga/mloflrZfliJcKLSBOT1RFOiDplqLmlbDjga7lkI3liY3jga/oh6rnlLHjgavku5jjgZHjgabjgY/jgaDjgZXjgYTjgILoqrLpoYzjgYzjgafjgY3jgZ/jgonjgIHjg6Hjg7zjg6vjgafpgKPntaHjgZfjgabjgY/jgaDjgZXjgYTjgILvvIjpgKPntaHlvozjgIFwb3NpdOS4iuOBp+OCs+ODvOODieOBqOOAgeWun+ihjOe1kOaenOOCkueiuuiqjeOBl+OBvuOBme+8iQoKIyMg6Zai5pWw44Gu5a6f6KGM5L6LMQpgYGB7cn0Kc291cmNlKCJkZXBfcmVsYXRpb25fdml6LlIiKQoKZGVwX3JlbGF0aW9uX3Zpeih0ZXh0X2VuKQpgYGAKIyMg6Zai5pWw44Gu5a6f6KGM5L6LMgpgYGB7cn0KdGV4dF9mciA8LSAiSmUgdm91cyBzb3VoYWl0ZSDinKggZGVzIGpvdXJuw6llcyBkJ2F1dG9tbmUgZG91aWxsZXR0ZXMg8J+OgyByZW1wbGllcyBkZSBmZXVpbGxlcyBjb2xvcsOpZXMg8J+NgSEiCmRlcF9yZWxhdGlvbl92aXoodGV4dF9mcikKYGBgCgoK