Lecture 10: network描画

前回の補足

フォームのリアクション制御: isolate()

ui.R: actionButtonの追加

sidebarPanel(
    textInput("keyword", 
              "検索キーワード:",
              value="#olympics"),
      actionButton("update", "Search"),
      uiOutput("controlColor"),

server.R: isolate()

 output$search <- renderTable({
    input$update
    res<-searchTwitter(isolate({input$keyword}), isolate({n=input$num}))
    res <- sapply(res, function(x) x$getText())
    cbind(res)
  })

キャッシュの利用: memoise()

global.R: memoise()

  getTweet <- memoise(function(term, num) {
  res<-searchTwitter(term, n=num)
  res <- sapply(res, function(x) x$getText())
  return(res)
})

server.R: global.Rの関数を利用

 output$search <- renderTable({
    input$update
    res<-getTweet(isolate({input$keyword}), isolate({n=input$num}))
    cbind(res)
  })

文字単位のn-gram:英文

test1<-"The meaning of Osaka University's motto"
substr(test1,1,4)

## [1] "The "

size=4
len<-nchar(test1)-size+1
ngramLst <- c()
for(i in 1:nchar(test1)){
  ngramLst<-rbind(ngramLst,(substr(test1,i,i+size-1)))
}

文字単位のn-gram:日本語

test2<-"大阪大学は、「大阪にも帝国大学を」という地元大阪府民の熱意と、関係者の努力により"
substr(test2,1,4)

## [1] "大阪大学"

nchar(test2)

## [1] 40

size=4
len<-nchar(test2)-size+1
ngramLst <- c()
for(i in 1:len){
 ngramLst<-rbind(ngramLst,(substr(test2,i,i+size-1)))
}

単語単位のn-gram

単語単位のリスト作成

test1<-"The meaning of Osaka University's motto"
wordLst <- strsplit(test1, "[[:space:]]|[[:punct:]]")
wordLst <- unlist(wordLst)
wordLst <- tolower(wordLst)
wordLst <- wordLst[wordLst != ""]
wordLst

## [1] "the"        "meaning"    "of"         "osaka"      "university"
## [6] "s"          "motto"

単語単位のngram

size=3
start=1
wordLst[start:(start+size-1)]

## [1] "the"     "meaning" "of"

strLst<-c()
len<-length(wordLst)-size+1
for(i in seq(1,len,size)) {
  strLst<-rbind(strLst,wordLst[i:(i+size-1)])
}
strLst

##      [,1]    [,2]         [,3]
## [1,] "the"   "meaning"    "of"
## [2,] "osaka" "university" "s"

RMecabによる日本語形態素解析

library("RMeCab")
wordLst<-unlist(RMeCabC(test2))
wordLst

##       名詞       助詞       記号       記号       名詞       助詞 
## "大阪大学"       "は"       "、"       "「"     "大阪"       "に" 
##       助詞       名詞       名詞       助詞       記号       助詞 
##       "も"     "帝国"     "大学"       "を"       "」"   "という" 
##       名詞       名詞       名詞       助詞       名詞       助詞 
##     "地元"     "大阪"     "府民"       "の"     "熱意"       "と" 
##       記号       名詞       名詞       助詞       名詞       助詞 
##       "、"     "関係"       "者"       "の"     "努力"   "により"

単語単位のngram：日本語(記号を除除)

wordLst <- wordLst[names(wordLst)!="記号"]

size=3
len<-length(wordLst)-size+1
strLst<-c()
for(i in seq(1,len,size)) {
  strLst<-rbind(strLst,wordLst[i:(i+size-1)])
}
strLst

##      名詞       助詞   名詞    
## [1,] "大阪大学" "は"   "大阪"  
## [2,] "に"       "も"   "帝国"  
## [3,] "大学"     "を"   "という"
## [4,] "地元"     "大阪" "府民"  
## [5,] "の"       "熱意" "と"    
## [6,] "関係"     "者"   "の"

ネットワーク描画

使用データ

test1<-"Osaka University: The meaning of Osaka University's motto"
wordLst <- strsplit(test1, "[[:space:]]|[[:punct:]]")
wordLst <- unlist(wordLst)
wordLst <- tolower(wordLst)
wordLst <- wordLst[wordLst != ""]

size=3
step=2
strLst<-c()
len<-length(wordLst)-size+1
for(i in seq(1,len,step)) {
  strLst<-rbind(strLst,wordLst[i:(i+size-1)])
}
strLst

##      [,1]         [,2]         [,3]        
## [1,] "osaka"      "university" "the"       
## [2,] "the"        "meaning"    "of"        
## [3,] "of"         "osaka"      "university"
## [4,] "university" "s"          "motto"

ペアデータの作成

一行のデータ

str<-strLst[1,]

pLst <- c()
for(i in 1:(length(str)-1)){
    for(j in (i+1):length(str)){
      tmp<-cbind(str[i],str[j])
      pLst<-rbind(pLst,tmp)
    }
  }
pLst

##      [,1]         [,2]        
## [1,] "osaka"      "university"
## [2,] "osaka"      "the"       
## [3,] "university" "the"

関数の作成

getPairs<-function(str){
  prLst<-c()
  for(i in 1:(length(str)-1)){
    for(j in (i+1):length(str)){
      tmp<-cbind(str[i],str[j])
      prLst<-rbind(prLst,tmp)
    }
  }
  return(prLst)
}

getPairsLst<-function(strLst){
  lst<-c()
  for(i in 1:nrow(strLst)){
    lst<-rbind(lst,getPairs(strLst[i,]))
  }

  return(lst)
}

ペアデータ

getPairs(strLst[1,])

##      [,1]         [,2]        
## [1,] "osaka"      "university"
## [2,] "osaka"      "the"       
## [3,] "university" "the"

getPairs(strLst[2,])

##      [,1]      [,2]     
## [1,] "the"     "meaning"
## [2,] "the"     "of"     
## [3,] "meaning" "of"

getPairsLst(strLst)

##       [,1]         [,2]        
##  [1,] "osaka"      "university"
##  [2,] "osaka"      "the"       
##  [3,] "university" "the"       
##  [4,] "the"        "meaning"   
##  [5,] "the"        "of"        
##  [6,] "meaning"    "of"        
##  [7,] "of"         "osaka"     
##  [8,] "of"         "university"
##  [9,] "osaka"      "university"
## [10,] "university" "s"         
## [11,] "university" "motto"     
## [12,] "s"          "motto"

データフレーム型変換

paris<- data.frame(getPairsLst(strLst))
colnames(paris)<-c("Term1","Term2")

ペア頻度表

u<-unique(paris)

fMtx<-c()
for(i in 1:nrow(u)){
  freq <-nrow(paris[paris$Term1==u$Term1[i] & paris$Term2==u$Term2[i],])
  tmpRow <- cbind(as.character(u$Term1[i]), as.character(u$Term2[i]), freq)
  fMtx <- rbind(fMtx,tmpRow)
}
fMtx<-data.frame(fMtx)
colnames(fMtx)<-c("Term1","Term2","Freq")
fMtx

##         Term1      Term2 Freq
## 1       osaka university    2
## 2       osaka        the    1
## 3  university        the    1
## 4         the    meaning    1
## 5         the         of    1
## 6     meaning         of    1
## 7          of      osaka    1
## 8          of university    1
## 9  university          s    1
## 10 university      motto    1
## 11          s      motto    1

igraphを利用した描画

library(igraph)
wng<-graph.data.frame(fMtx)
plot(wng)

igraphを利用した描画

wng<-as.undirected(graph.data.frame(fMtx))
plot(wng)

igraphを利用した描画

wng<-as.undirected(graph.data.frame(fMtx))
E(wng)$weight<-fMtx$Freq
deg<-degree(wng)
plot(wng,edge.width=E(wng)$weight,vertex.size=30*(deg/max(deg)))

netwkアプリケーションの起動(d3Networkを利用)

  library(shiny)
  runApp("netwk")

前回の補足

フォームのリアクション制御: isolate()

ui.R: actionButtonの追加

server.R: isolate()

キャッシュの利用: memoise()

global.R: memoise()

server.R: global.Rの関数を利用

文字単位のn-gram:英文

文字単位のn-gram:日本語

単語単位のn-gram

単語単位のリスト作成

単語単位のngram

RMecabによる日本語形態素解析

単語単位のngram：日本語(記号を除除)

ネットワーク描画

使用データ

ペアデータの作成

一行のデータ

関数の作成

ペアデータ

データフレーム型変換

ペア頻度表

igraphを利用した描画

igraphを利用した描画

igraphを利用した描画

netwkアプリケーションの起動(d3Networkを利用)

最終課題

Shinyで簡単なアプリケーションを作成

期限：2月4日

アプリケーションフォルダーを圧縮して、メールで提出すること。