Lecture2: 基本統計量, Manipulate関数

作業ディレクトリの確認

getwd()

基本操作

2+3

## [1] 5

n1<-2
n2<-3
n1*n2

## [1] 6

lst <- c(2,3)
lst[1]*lst[2]

## [1] 6

平方根

arg <- 4
sqrt <- sqrt(arg)
paste("平方根は",arg)

## [1] "平方根は 4"

printSQRT関数を作成

printSQRT<- function(arg) {
    sqrt <- sqrt(arg)
    paste("平方根は",arg)
}

printSQRT関数ファイルの読み込み

source("printSQRT.R")
printSQRT(7)

## [1] "平方根は..... 7"

べき乗

2^3

## [1] 8

べき乗関数を作成

calcPower<- function(arg1, arg2) {
    power <- arg1^arg2
    return (power)
}

べき乗関数を実行

calcPower(2,5)

## [1] 32

テキストの頻度表作成

テキストファイルの読み込み

一行ずつ読み込んで、リストに格納

txt<-readLines("shiny.txt")

## [1] "Shiny is an R package that makes it easy to build interactive web apps straight from R. "             
## [2] "You can host standalone apps on a webpage or embed them in R Markdown documents or build dashboards. "
## [3] "You can also extend your Shiny apps with CSS themes, htmlwidgets, and JavaScript actions."

一行目の内容

txt[1]

## [1] "Shiny is an R package that makes it easy to build interactive web apps straight from R. "

読み込んだ行数

length(txt)

## [1] 3

スペース&記号による分割

Punctuation characters:
! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~.

wordLst<-strsplit(txt,"[[:space:]]|[[:punct:]]")

## [[1]]
##  [1] "Shiny"       "is"          "an"          "R"           "package"    
##  [6] "that"        "makes"       "it"          "easy"        "to"         
## [11] "build"       "interactive" "web"         "apps"        "straight"   
## [16] "from"        "R"           ""           
## 
## [[2]]
##  [1] "You"        "can"        "host"       "standalone" "apps"      
##  [6] "on"         "a"          "webpage"    "or"         "embed"     
## [11] "them"       "in"         "R"          "Markdown"   "documents" 
## [16] "or"         "build"      "dashboards" ""          
## 
## [[3]]
##  [1] "You"         "can"         "also"        "extend"      "your"       
##  [6] "Shiny"       "apps"        "with"        "CSS"         "themes"     
## [11] ""            "htmlwidgets" ""            "and"         "JavaScript" 
## [16] "actions"

各行のデータを一括化

wordLst<-unlist(wordLst)

##  [1] "Shiny"       "is"          "an"          "R"           "package"    
##  [6] "that"        "makes"       "it"          "easy"        "to"         
## [11] "build"       "interactive" "web"         "apps"        "straight"   
## [16] "from"        "R"           ""            "You"         "can"        
## [21] "host"        "standalone"  "apps"        "on"          "a"          
## [26] "webpage"     "or"          "embed"       "them"        "in"         
## [31] "R"           "Markdown"    "documents"   "or"          "build"      
## [36] "dashboards"  ""            "You"         "can"         "also"       
## [41] "extend"      "your"        "Shiny"       "apps"        "with"       
## [46] "CSS"         "themes"      ""            "htmlwidgets" ""           
## [51] "and"         "JavaScript"  "actions"

小文字に変換

wordLst<-tolower(wordLst)

##  [1] "shiny"       "is"          "an"          "r"           "package"    
##  [6] "that"        "makes"       "it"          "easy"        "to"         
## [11] "build"       "interactive" "web"         "apps"        "straight"   
## [16] "from"        "r"           ""            "you"         "can"        
## [21] "host"        "standalone"  "apps"        "on"          "a"          
## [26] "webpage"     "or"          "embed"       "them"        "in"         
## [31] "r"           "markdown"    "documents"   "or"          "build"      
## [36] "dashboards"  ""            "you"         "can"         "also"       
## [41] "extend"      "your"        "shiny"       "apps"        "with"       
## [46] "css"         "themes"      ""            "htmlwidgets" ""           
## [51] "and"         "javascript"  "actions"

空白“”の削除(どちらか好きなほう)

wordLst<-wordLst[nchar(wordLst)>0]
wordLst<- wordLst[wordLst != ""]

単語のToken数

tokens <- length(wordLst)

## [1] 49

単語のTypes数

unique()関数は，リストの重複しない要素を返す

types <- length(unique(wordLst))

## [1] 40

TTR: Type-Token Ratioの計算

\[TTR=\frac{types}{tokens} \times 100 \]

types/tokens*100

## [1] 81.63265

小数点2桁で結果を出力

TTR <- round(types/tokens*100,2)

## [1] 81.63

頻度数の集計(頻度順でソート)

freq<-sort(table(wordLst), decreasing=TRUE)

## wordLst
##        apps           r       build         can          or       shiny 
##           3           3           2           2           2           2 
##         you           a     actions        also          an         and 
##           2           1           1           1           1           1 
##         css  dashboards   documents        easy       embed      extend 
##           1           1           1           1           1           1 
##        from        host htmlwidgets          in interactive          is 
##           1           1           1           1           1           1 
##          it  javascript       makes    markdown          on     package 
##           1           1           1           1           1           1 
##  standalone    straight        that        them      themes          to 
##           1           1           1           1           1           1 
##         web     webpage        with        your 
##           1           1           1           1

相対頻度数

全体を１としたときの出現率

relative <- freq / sum(freq)

## wordLst
##        apps           r       build         can          or       shiny 
##  0.06122449  0.06122449  0.04081633  0.04081633  0.04081633  0.04081633 
##         you           a     actions        also          an         and 
##  0.04081633  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816 
##         css  dashboards   documents        easy       embed      extend 
##  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816 
##        from        host htmlwidgets          in interactive          is 
##  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816 
##          it  javascript       makes    markdown          on     package 
##  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816 
##  standalone    straight        that        them      themes          to 
##  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816  0.02040816 
##         web     webpage        with        your 
##  0.02040816  0.02040816  0.02040816  0.02040816

練習：小数点3桁で結果を出力

relative

## wordLst
##        apps           r       build         can          or       shiny 
##       0.061       0.061       0.041       0.041       0.041       0.041 
##         you           a     actions        also          an         and 
##       0.041       0.020       0.020       0.020       0.020       0.020 
##         css  dashboards   documents        easy       embed      extend 
##       0.020       0.020       0.020       0.020       0.020       0.020 
##        from        host htmlwidgets          in interactive          is 
##       0.020       0.020       0.020       0.020       0.020       0.020 
##          it  javascript       makes    markdown          on     package 
##       0.020       0.020       0.020       0.020       0.020       0.020 
##  standalone    straight        that        them      themes          to 
##       0.020       0.020       0.020       0.020       0.020       0.020 
##         web     webpage        with        your 
##       0.020       0.020       0.020       0.020

データ型に変換

freqData <- data.frame(freq)
relativeData <- data.frame(relative)

##   wordLst Freq
## 1    apps    3
## 2       r    3
## 3   build    2
## 4     can    2
## 5      or    2
## 6   shiny    2

##   wordLst  Freq
## 1    apps 0.061
## 2       r 0.061
## 3   build 0.041
## 4     can 0.041
## 5      or 0.041
## 6   shiny 0.041

２つのデータ型変数を連結(merge)

freqMtx <- merge(freqData, relativeData, all=T, by="wordLst")

##   wordLst Freq.x Freq.y
## 1       a      1  0.020
## 2 actions      1  0.020
## 3    also      1  0.020
## 4      an      1  0.020
## 5     and      1  0.020
## 6    apps      3  0.061

列に名前をつける

names(freqMtx) <- c("term","raw", "relative")

##      term raw relative
## 1       a   1    0.020
## 2 actions   1    0.020
## 3    also   1    0.020
## 4      an   1    0.020
## 5     and   1    0.020
## 6    apps   3    0.061

csvファイルに出力

write.csv(freqMtx,"shiny_freq.csv")

単語頻度数分布

barplot(freq, las=3)

単語頻度数分布(色付き)

colors = c("red", "blue", "green") 
barplot(freq, las=3,col=colors)

お遊戯1

plot(0,0,pch=8)

plot(0,0,pch=8,cex=5)

plot(0,0,pch=8,cex=5,col="red")

manipulate package

インタラクティブなプロット

manipulate packageがインストールされていない場合

install.packages("manipulate")

library(manipulate)

お遊戯：色の選択

picker()関数

manipulate(plot(0,0,pch=8,cex=5,col=myColors), myColors=picker("red", "yellow", "green", "violet", "orange", "blue", "pink", "cyan") )

お遊戯：プロットマーカーの選択

picker()関数

manipulate(
  plot(0,0,pch=myMarkers,cex=5,col=myColors), myColors=picker("red", "yellow", "green", "violet", "orange", "blue", "pink", "cyan",initial="violet"),
myMarkers=picker(1,2,3,4,5,6,7,8,initial="5")
)

お遊戯：プロットサイズの選択

picker()関数

manipulate(
  plot(0,0,pch=8,cex=mySize,col="blue"),
mySize=slider(1,10,initial=5)
)

お遊戯: 文字の描画1

plot(0,0,type="n")
text(0,0, "R",cex=1,col="blue")

お遊戯編：色の選択picker()関数

manipulate関数のメイン文は{}で囲む

manipulate({
  plot(0,0,type="n")
  text(0,0, "R",cex=1,col=myColors)
}, 
  myColors=picker("red", "yellow", "green", "violet", "orange", "blue", "pink", "cyan") )

練習：プロットサイズの選択機能をつけてください

slider関数: 最小=1,最大=10, 初期値3

文字の描画2

“Shiny”をx=0.5, y=-0.5の位置に配置

“R”をx=0.0, y=0.5の位置に配置

plot(0,0,type="n")
text(0.5,-0.5,"Shiny",col="red",cex=1.5)
text(0.0,0.5,"R",col="blue",cex=1.5)

練習: manilupateで“Shiny”のxの位置を-1.0<=x<=1.0まで、0.2刻みで移動できるようにしてください

manipulate({
plot(0,0,type="n")
text(x,-0.5,"Shiny",col="red",cex=1.5)
},
  x=slider(-1,1,initial=0, step=0.2)
)

今日の課題

テキストファイル名を引数にして、単語の頻度数と相対頻度をマージした行列データを出力する関数を作成しなさい。

関数名はgetFreqMtxとし、関数ファイル（getFreqMtx.R）をメールで提出すること。
提出前に、テキストファイル"shiny.txt"を使用して、正しく実行できるかを必ず確認すること。

実行結果出力イメージ

source("getFreqMtx.R")
res<-getFreqMtx("shiny.txt")
head(res)

##      term raw relative
## 1       a   1    0.020
## 2 actions   1    0.020
## 3    also   1    0.020
## 4      an   1    0.020
## 5     and   1    0.020
## 6    apps   3    0.061