Lecture9: Network representation & TwitteR

Network representation

文字単位のn-gram:英文

test1<-"I am innocent of the allegations made against me, Ghosn told the Tokyo District Court in English, adding that he has been wrongly accused and unfairly detained."
substr(test1,1,4)

## [1] "I am"

size=4
len<-nchar(test1)-size+1
ngramLst <- c()
for(i in 1:nchar(test1)){
  ngramLst<-rbind(ngramLst,(substr(test1,i,i+size-1)))
}

head(ngramLst)

##      [,1]  
## [1,] "I am"
## [2,] " am "
## [3,] "am i"
## [4,] "m in"
## [5,] " inn"
## [6,] "inno"

単語単位のn-gram

準備：単語単位のリスト作成

wordLst <- strsplit(test1, "[[:space:]]|[[:punct:]]")
wordLst <- unlist(wordLst)
wordLst <- tolower(wordLst)
wordLst <- wordLst[wordLst != ""]
wordLst

##  [1] "i"           "am"          "innocent"    "of"          "the"        
##  [6] "allegations" "made"        "against"     "me"          "ghosn"      
## [11] "told"        "the"         "tokyo"       "district"    "court"      
## [16] "in"          "english"     "adding"      "that"        "he"         
## [21] "has"         "been"        "wrongly"     "accused"     "and"        
## [26] "unfairly"    "detained"

単語単位のngram

size=3
start=1
wordLst[start:(start+size-1)]

## [1] "i"        "am"       "innocent"

strLst<-c()
len<-length(wordLst)-size+1
for(i in seq(1,len,size)) {
  strLst<-rbind(strLst,wordLst[i:(i+size-1)])
}
strLst

##       [,1]    [,2]       [,3]         
##  [1,] "i"     "am"       "innocent"   
##  [2,] "of"    "the"      "allegations"
##  [3,] "made"  "against"  "me"         
##  [4,] "ghosn" "told"     "the"        
##  [5,] "tokyo" "district" "court"      
##  [6,] "in"    "english"  "adding"     
##  [7,] "that"  "he"       "has"        
##  [8,] "been"  "wrongly"  "accused"    
##  [9,] "and"   "unfairly" "detained"

ネットワーク描画

ペアデータの作成

一行のデータ

str<-strLst[1,]

pLst <- c()
for(i in 1:(length(str)-1)){
    for(j in (i+1):length(str)){
      tmp<-cbind(str[i],str[j])
      pLst<-rbind(pLst,tmp)
    }
  }
pLst

##      [,1] [,2]      
## [1,] "i"  "am"      
## [2,] "i"  "innocent"
## [3,] "am" "innocent"

netwkPairs.R（ペア単語取得用関数）のロード

source("netwkPairs.R")

ペアデータの取得

getPairs(strLst[1,])

##      [,1] [,2]      
## [1,] "i"  "am"      
## [2,] "i"  "innocent"
## [3,] "am" "innocent"

getPairs(strLst[2,])

##      [,1]  [,2]         
## [1,] "of"  "the"        
## [2,] "of"  "allegations"
## [3,] "the" "allegations"

getPairsLst(strLst)

##       [,1]       [,2]         
##  [1,] "i"        "am"         
##  [2,] "i"        "innocent"   
##  [3,] "am"       "innocent"   
##  [4,] "of"       "the"        
##  [5,] "of"       "allegations"
##  [6,] "the"      "allegations"
##  [7,] "made"     "against"    
##  [8,] "made"     "me"         
##  [9,] "against"  "me"         
## [10,] "ghosn"    "told"       
## [11,] "ghosn"    "the"        
## [12,] "told"     "the"        
## [13,] "tokyo"    "district"   
## [14,] "tokyo"    "court"      
## [15,] "district" "court"      
## [16,] "in"       "english"    
## [17,] "in"       "adding"     
## [18,] "english"  "adding"     
## [19,] "that"     "he"         
## [20,] "that"     "has"        
## [21,] "he"       "has"        
## [22,] "been"     "wrongly"    
## [23,] "been"     "accused"    
## [24,] "wrongly"  "accused"    
## [25,] "and"      "unfairly"   
## [26,] "and"      "detained"   
## [27,] "unfairly" "detained"

ペア頻度表

pFreq<-getPairsFreq(strLst)
head(pFreq)

##   Term1       Term2 Freq
## 1     i          am    1
## 2     i    innocent    1
## 3    am    innocent    1
## 4    of         the    1
## 5    of allegations    1
## 6   the allegations    1

igraphを利用した描画（無向グラフ）

library(igraph)

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

wng<-as.undirected(graph.data.frame(pFreq))
plot(wng)

テキストファイルからネットワーク図作成

filename <- "Ghosn.txt"
txt<-readLines(filename)
txt<- splitWdsEn(txt)
head(txt)

## [1] "embattled" "former"    "nissan"    "motor"     "co"        "chairman"

size=4
step=2
strLst<-getNstr(txt,size,step)
pFreq<-getPairsFreq(strLst)
head(pFreq)

##       Term1  Term2 Freq
## 1 embattled former    1
## 2 embattled nissan    1
## 3 embattled  motor    1
## 4    former nissan    1
## 5    former  motor    1
## 6    nissan  motor    4

pFreq_s<-pFreq[pFreq$Freq>=6,]

igraphを利用した描画（無向グラフ）

wng<-as.undirected(graph.data.frame(pFreq_s))
plot(wng)

d3Networkのインストール

  install.packages("d3Network")

d3SimpleNetwork関数

d3SimpleNetwork(net, width=800, height=500, 
                    standAlone=FALSE, opacity = 1.0,
                    charge = -200, fontsize = as.numeric(input$sel_fontsize),
                    parentElement = "#d3networkPlot")

netwkアプリケーションの起動(d3Networkを利用)

  library(shiny)
  runApp("app_netwk")

Twitterアプリケーションの作成＆登録

OAuth 認証用

https://apps.twitter.com/

ROAuth, twitteRのインストール

  install.packages('twitteR')
　install.packages('ROAuth')

ROAuth, twitteRのインストールの読み込み

  library(twitteR)
  library(ROAuth)

twitteRからのOauth認証

cacert.pemをダウンロード

  download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem")

認証情報(Twitterアプリケーション)

consr_key="***********"
consr_secrt="***********"
req_url ="https://api.twitter.com/oauth/request_token"
acs_url = "https://api.twitter.com/oauth/access_token"
auth_url="https://api.twitter.com/oauth/authorize"

cred<-OAuthFactory$new(consumerKey=consr_key,consumerSecret=consr_secrt,requestURL =req_url,accessURL = acs_url,authURL=auth_url)

handshake: twitterクライアント接続

cred$handshake(cainfo="cacert.pem")

認証情報取得

setup_twitter_oauth(consr_key, consr_secrt, acs_token, acs_token_sec)

検索例

searchTwitter("#DH2019", n=10)
searchTwitter("#阪大", n=10)
searchTwitter("#Gohsn", n=10)
searchTwitter("Gohsn + Nissan", n=10)

username = "casualconc"
userTimeline(username)

詳細情報取得

stweets<-searchTwitter("#ゴーン", n=10)
stweets[1]

#converting a list type into a dataframe
stweet.df<-twListToDF(stweets)

#colnames
colnames(stweet.df)

cDateTokens<-stweet.df$created
cDateTokens
#cDateFreq<-table(as.Date(cDateLst))
cDateFreq<-table(as.Date(cDateTokens))
cDateTypes<- as.Date(names(cDateFreq))
cDateTokens
cDateFreq
cDateTypes

タイムライン情報

tweets<-searchTwitter("#ゴーン", n=100)


#converting a list type into a dataframe
tweet.df<-twListToDF(tweets)

cDateTokens<-tweet.df$created
cDateFreq<-table(as.Date(cDateTokens))
cDateTypes<- as.Date(names(cDateFreq))

cDateFreq
cDateTypes

タイムライン表示1

#par(xaxt="s")
par(family="HiraMaruProN-W4")
plot(cDateTypes , as.numeric(cDateFreq), type="h", ylab="frequency")

タイムライン表示2

par(xaxt="n")
plot(cDateTypes , as.numeric(cDateFreq), type="h", ylab="frequency")
par(xaxt = "s")
axis.Date(1, las=2,at = seq(min(cDateTypes),max(cDateTypes),"days"),format = "%m/%d")

タイムライン表示3

par(xaxt="n")
plot(cDateTypes , as.numeric(cDateFreq), type="h", ylab="frequency")
par(xaxt = "s")
axis.Date(1, las=2,at = seq(min(cDateTypes),max(cDateTypes),"days"),format = "%m月%d日")

トレンド情報取得

aTL <- availableTrendLocations()
head(aTL)
subset(aTL, subset=country=="Japan")
subset(aTL, subset=name=="Osaka")

getTrends(subset(aTL, subset=name=="Osaka")[3])