Lecture7: apply関数

準備
dplyr, DTパッケージのインストール

dplyrパッケージ: full_join関数（merge関数の代用）

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

wordcloud package

library(wordcloud)

## Loading required package: RColorBrewer

apply()関数：複数のオブジェクトに同じ操作をする

準備

source("getFreqMtx.R")
freqMtx<-getFreqMtx("shiny.txt")
freqMtx<-freqMtx[order(freqMtx$raw, decreasing = TRUE),]
rownames(freqMtx)

##  [1] "apps"        "r"           "build"       "can"         "or"         
##  [6] "shiny"       "you"         "a"           "actions"     "also"       
## [11] "an"          "and"         "css"         "dashboards"  "documents"  
## [16] "easy"        "embed"       "extend"      "from"        "host"       
## [21] "htmlwidgets" "in"          "interactive" "is"          "it"         
## [26] "javascript"  "makes"       "markdown"    "on"          "package"    
## [31] "standalone"  "straight"    "that"        "them"        "themes"     
## [36] "to"          "web"         "webpage"     "with"        "your"

データの一部を抽出

tmp<-rownames(freqMtx)[1:10]

lapply関数

nchar
paste

lapply(tmp, nchar)
lapply(tmp, paste, "@ShintTxt")

sapply関数

sapply(tmp, nchar)

##    apps       r   build     can      or   shiny     you       a actions    also 
##       4       1       5       3       2       5       3       1       7       4

sapply(tmp, paste, "@ShintTxt")

##                apps                   r               build                 can 
##    "apps @ShintTxt"       "r @ShintTxt"   "build @ShintTxt"     "can @ShintTxt" 
##                  or               shiny                 you                   a 
##      "or @ShintTxt"   "shiny @ShintTxt"     "you @ShintTxt"       "a @ShintTxt" 
##             actions                also 
## "actions @ShintTxt"    "also @ShintTxt"

データの一部を抽出

tmp2 <- freqMtx[1:5,]

apply関数

apply(tmp2, 1, sum)

##  apps     r build   can    or 
## 3.075 3.075 2.050 2.050 2.050

apply(tmp2, 2, sum)

##      raw relative 
##     12.0      0.3

apply(tmp2, c(1,2), sqrt)

##            raw  relative
## apps  1.732051 0.2738613
## r     1.732051 0.2738613
## build 1.414214 0.2236068
## can   1.414214 0.2236068
## or    1.414214 0.2236068

function()で操作内容を書く

apply(tmp2, c(1,2), function(x) x*10)

##       raw relative
## apps   30     0.75
## r      30     0.75
## build  20     0.50
## can    20     0.50
## or     20     0.50

同一ディレクトリの複数フォルダから出現単語行列を作成

source("getFreqMtx2.R")

ディレクトリ“testData”から出現単語行列を作成

# ディレクトリ名
dirName <- "testData"

# 指定ディレクトリのファイル一覧を取得
files <- list.files(dirName)

作業ディレクトリからのファイルの相対参照パスを作成

ディレクトリとファイル名を連結 ex.“testData/test1.txt”

filesDir <- unlist(lapply(dirName, paste, files, sep = "/"))
filesDir

## [1] "testData/test1.txt" "testData/test2.txt" "testData/test3.txt"
## [4] "testData/test4.txt"

頻度行列の列名として使用

fnames<-unlist(lapply(files, function(x) unlist(strsplit(x,"\\."))[1]))
fnames

## [1] "test1" "test2" "test3" "test4"

ファイルリストからの素頻度表作成

freqLst <- lapply(filesDir, getFreqMtx2)

ファイル頻度表をマージ: full_join関数

tf <- freqLst[[1]]
for (i in freqLst[-1]) tf <- full_join(tf, i, all = T, by = "term")
tf[is.na(tf)] <- 0
row.names(tf) <- tf[, 1]
tf <- tf[-1]
colnames(tf) <- fnames

結果表示

tf

##   test1 test2 test3 test4
## c    13     2     3     5
## e     7     1     1     2
## b     4     4     0     4
## a     3     2     2     4
## f     0    11     9    20
## g     0     7     7    14
## h     0     0     4     4
## d     0     0     1     1

getFreqDir関数の読み込み

source("getFreqDir.R")

univディレクトリ内の頻度表の作成

univTable <- getFreqDir("univ")

行列のサイズ

dim(univTable)

## [1] 1178    8

結果の一部表示

head(univTable)

##            hiroshima kufs kyoto osaka1 osaka2 osaka3 tokyo waseda
## to                11   17    26     11     17     11    15     35
## and                7   18    38     15     26     31    22     37
## the                7   25    36     16     31     32    50     39
## university         6    5    16     18     21     15     9     22
## hiroshima          5    0     0      0      0      0     0      0
## of                 5   18    35     10     26     30    38     34

一列目のデータ抽出

head(univTable[,1])

## [1] 11  7  7  6  5  5

head(univTable$hiroshima)

## [1] 11  7  7  6  5  5

wordcloud(色付き)

sData <- univTable$hiroshima[1:20]
sLabel <- rownames(univTable)[1:20]
wordcloud(sLabel,sData,colors=rainbow(10))

今日の課題: shiny app

“app_freqBar1”に、Barplotタブの棒グラフと同じデータを使用して、wordcloudを描画するタブを追加してしなさい。

Barplotタブの棒グラフで使用しているデータ

freq<-tf$hiroshima[1:50]
label<-rownames(tf)[1:50]

作成例

コーパス言語学B: Lecture07 (Fall 2020)

Lecture7: apply関数

dplyrパッケージ: full_join関数（merge関数の代用）

wordcloud package

apply()関数：複数のオブジェクトに同じ操作をする

準備

データの一部を抽出

lapply関数

sapply関数

データの一部を抽出

apply関数

function()で操作内容を書く

同一ディレクトリの複数フォルダから出現単語行列を作成

ディレクトリ“testData”から出現単語行列を作成

作業ディレクトリからのファイルの相対参照パスを作成

頻度行列の列名として使用

ファイルリストからの素頻度表作成

ファイル頻度表をマージ: full_join関数

結果表示

getFreqDir関数の読み込み

univディレクトリ内の頻度表の作成

行列のサイズ

結果の一部表示

一列目のデータ抽出

wordcloud(色付き)

今日の課題: shiny app

“app_freqBar1”に、Barplotタブの棒グラフと同じデータを使用して、wordcloudを描画するタブを追加してしなさい。