Lecture2: 基本統計量 (Manipulate関数)

作業ディレクトリの確認

getwd()
## [1] "/cloud/project"

基本操作: リスト

c(1, 2, 3, 4, 5)
## [1] 1 2 3 4 5
c(1, 2, 3, 4, 5)*2
## [1]  2  4  6  8 10

基本操作: 変数

Y <- c(1, 2, 3, 4, 5)
Y*2 -> Z
Y[1]
## [1] 1

基本操作: 関数の利用

str <- c ("a", "ab", "abc")
length(str)
## [1] 3
nchar(str)
## [1] 1 2 3

テキストの頻度表作成

テキストファイルの読み込み

一行ずつ読み込んで、リストに格納

txt<-readLines("shiny.txt")

結果出力

## [1] "Shiny is an R package that makes it easy to build interactive web apps straight from R. "             
## [2] "You can host standalone apps on a webpage or embed them in R Markdown documents or build dashboards. "
## [3] "You can also extend your Shiny apps with CSS themes, htmlwidgets, and JavaScript actions."

一行目の内容

txt[1]
## [1] "Shiny is an R package that makes it easy to build interactive web apps straight from R. "

読み込んだ行数

length(txt)
## [1] 3

スペース&記号による分割

Punctuation characters:
! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~.
wordLst<-strsplit(txt,"[[:space:]]|[[:punct:]]")

結果出力

wordLst
## [[1]]
##  [1] "Shiny"       "is"          "an"          "R"           "package"    
##  [6] "that"        "makes"       "it"          "easy"        "to"         
## [11] "build"       "interactive" "web"         "apps"        "straight"   
## [16] "from"        "R"           ""           
## 
## [[2]]
##  [1] "You"        "can"        "host"       "standalone" "apps"      
##  [6] "on"         "a"          "webpage"    "or"         "embed"     
## [11] "them"       "in"         "R"          "Markdown"   "documents" 
## [16] "or"         "build"      "dashboards" ""          
## 
## [[3]]
##  [1] "You"         "can"         "also"        "extend"      "your"       
##  [6] "Shiny"       "apps"        "with"        "CSS"         "themes"     
## [11] ""            "htmlwidgets" ""            "and"         "JavaScript" 
## [16] "actions"

各行のデータを一括化

wordLst<-unlist(wordLst)

結果出力

##  [1] "Shiny"       "is"          "an"          "R"           "package"    
##  [6] "that"        "makes"       "it"          "easy"        "to"         
## [11] "build"       "interactive" "web"         "apps"        "straight"   
## [16] "from"        "R"           ""            "You"         "can"        
## [21] "host"        "standalone"  "apps"        "on"          "a"          
## [26] "webpage"     "or"          "embed"       "them"        "in"         
## [31] "R"           "Markdown"    "documents"   "or"          "build"      
## [36] "dashboards"  ""            "You"         "can"         "also"       
## [41] "extend"      "your"        "Shiny"       "apps"        "with"       
## [46] "CSS"         "themes"      ""            "htmlwidgets" ""           
## [51] "and"         "JavaScript"  "actions"

小文字に変換

wordLst<-tolower(wordLst)

結果出力

##  [1] "shiny"       "is"          "an"          "r"           "package"    
##  [6] "that"        "makes"       "it"          "easy"        "to"         
## [11] "build"       "interactive" "web"         "apps"        "straight"   
## [16] "from"        "r"           ""            "you"         "can"        
## [21] "host"        "standalone"  "apps"        "on"          "a"          
## [26] "webpage"     "or"          "embed"       "them"        "in"         
## [31] "r"           "markdown"    "documents"   "or"          "build"      
## [36] "dashboards"  ""            "you"         "can"         "also"       
## [41] "extend"      "your"        "shiny"       "apps"        "with"       
## [46] "css"         "themes"      ""            "htmlwidgets" ""           
## [51] "and"         "javascript"  "actions"

空白""の削除(どちらか好きなほう)

#wordLst<-wordLst[nchar(wordLst)>0]
wordLst<- wordLst[wordLst != ""]

結果出力

##  [1] "shiny"       "is"          "an"          "r"           "package"    
##  [6] "that"        "makes"       "it"          "easy"        "to"         
## [11] "build"       "interactive" "web"         "apps"        "straight"   
## [16] "from"        "r"           "you"         "can"         "host"       
## [21] "standalone"  "apps"        "on"          "a"           "webpage"    
## [26] "or"          "embed"       "them"        "in"          "r"          
## [31] "markdown"    "documents"   "or"          "build"       "dashboards" 
## [36] "you"         "can"         "also"        "extend"      "your"       
## [41] "shiny"       "apps"        "with"        "css"         "themes"     
## [46] "htmlwidgets" "and"         "javascript"  "actions"

単語のToken数

tokens <- length(wordLst)

結果出力

## [1] 49

単語のTypes数

  • unique()関数は,リストの重複しない要素を返す
types <- length(unique(wordLst))

結果出力

## [1] 40

TTR: Type-Token Ratioの計算

\[TTR=\frac{types}{tokens} \times 100 \]

types/tokens*100
## [1] 81.63265
小数点2桁で結果を出力
TTR <- round(types/tokens*100,2)

結果出力

## [1] 81.63

頻度数の集計(頻度順でソート)

freq<-sort(table(wordLst), decreasing=TRUE)

結果出力

## wordLst
##        apps           r       build         can          or       shiny 
##           3           3           2           2           2           2 
##         you           a     actions        also          an         and 
##           2           1           1           1           1           1 
##         css  dashboards   documents        easy       embed      extend 
##           1           1           1           1           1           1 
##        from        host htmlwidgets          in interactive          is 
##           1           1           1           1           1           1 
##          it  javascript       makes    markdown          on     package 
##           1           1           1           1           1           1 
##  standalone    straight        that        them      themes          to 
##           1           1           1           1           1           1 
##         web     webpage        with        your 
##           1           1           1           1

相対頻度数

全体を1としたときの出現率

relative <- freq / sum(freq)

結果出力

## wordLst
##        apps           r       build         can          or       shiny 
##  0.06122449  0.06122449  0.04081633  0.04081633  0.04081633  0.04081633 
##         you           a     actions        also          an         and 
##  0.04081633  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816 
##         css  dashboards   documents        easy       embed      extend 
##  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816 
##        from        host htmlwidgets          in interactive          is 
##  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816 
##          it  javascript       makes    markdown          on     package 
##  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816 
##  standalone    straight        that        them      themes          to 
##  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816 
##         web     webpage        with        your 
##  0.02040816  0.02040816  0.02040816  0.02040816

練習:小数点3桁で結果を出力

結果出力

relative
## wordLst
##        apps           r       build         can          or       shiny 
##       0.061       0.061       0.041       0.041       0.041       0.041 
##         you           a     actions        also          an         and 
##       0.041       0.020       0.020       0.020       0.020       0.020 
##         css  dashboards   documents        easy       embed      extend 
##       0.020       0.020       0.020       0.020       0.020       0.020 
##        from        host htmlwidgets          in interactive          is 
##       0.020       0.020       0.020       0.020       0.020       0.020 
##          it  javascript       makes    markdown          on     package 
##       0.020       0.020       0.020       0.020       0.020       0.020 
##  standalone    straight        that        them      themes          to 
##       0.020       0.020       0.020       0.020       0.020       0.020 
##         web     webpage        with        your 
##       0.020       0.020       0.020       0.020

ファイルに出力

write.csv(freq, "freq_shiny.csv")

単語頻度数分布

barplot(freq, las=3)

単語頻度数分布(色付き)

las: label style

colors = c("red", "blue", "green") 
barplot(freq, las=3,col=colors)

関数の作成

平方根

arg <- 4
sqrt <- sqrt(arg)
paste("平方根は",sqrt)
## [1] "平方根は 2"

返り(戻り)値なし関数:printSQRT関数を作成

printSQRT<- function(arg) {
    value <- sqrt(arg)
    msg = paste(arg, "の平方根は: ")
    paste(msg,value)
}

printSQRT関数の実行

printSQRT(7)
## [1] "7 の平方根は:  2.64575131106459"

返り(戻り)値あり関数:printSQRT関数を作成

printSQRTr<- function(arg) {
    value <- sqrt(arg)
    msg = paste(arg, "の平方根は: ")
    print(paste(msg,value))
    return(sqrt)
}

printSQRT関数の実行

a<-printSQRTr(7)
## [1] "7 の平方根は:  2.64575131106459"
a
## [1] 2

べき乗

2^4
## [1] 16

戻り値あり関数:べき乗関数を作成

calcPower<- function(arg1, arg2) {
    power <- arg1^arg2
    return (power)
}

べき乗関数を実行

calcPower(2,5)
## [1] 32

練習: RTTR(Root Type-Token Ratio) Giraudの値を求める

\[RTTR=\frac{types}{\sqrt{tokens}} \]

お遊戯1

plot(0,0,pch=8)

plot(0,0,pch=8,cex=5)

plot(0,0,pch=8,cex=5,col="red")