Lec11

Twitter APIの利用

Install packages: httr, jsonlite

install.packages("httr")
install.packages("jsonlite")
#install.packages("dplyr")
install.packages("plyr")

パッケージの読み込み

library(httr)
library(jsonlite)
library(dplyr)
library(plyr)

Bearer Authentication

headers <- c(`Authorization` = sprintf('Bearer %s',  Sys.getenv("TWITTER_BEARER")))

Get tweets with specific words

RecentSearchURL <- 'https://api.twitter.com/2/tweets/search/recent'
params <- list(query = "(chatGPT OR OpenAI)(-is:retweet)", tweet.fields = "author_id,created_at,public_metrics", max_results = 20)
res <- httr::GET(url = RecentSearchURL, httr::add_headers(.headers = headers), query = params)
res

Review the response in the JSON format

obj <- httr::content(res, as = "text")
#prettify(obj)

Review the response in the JSON format

tweetData <- fromJSON(obj, flatten = TRUE) %>% as.data.frame()
#tmp <- fromJSON(obj, flatten = TRUE) 
#as.data.frame(tmp)

Review the response in a tibble

as_tibble(head(tweetData))

Get tweets with specific words & language

Specify English langage: “lang:en”

params <- list(query = "(chatGPT OR OpenAI)(lang:en)(-is:retweet)", tweet.fields = "author_id,created_at,public_metrics", max_results = 20)
res <- httr::GET(url = RecentSearchURL, httr::add_headers(.headers = headers), query = params)

Review the response

obj <- httr::content(res, as = "text")
tweetData <- fromJSON(obj, flatten = TRUE) %>% as.data.frame()
as_tibble(head(tweetData))

Specify English langage: “lang:ja”

params <- list(query = "(chatGPT OR OpenAI)(lang:ja)(-is:retweet)", tweet.fields = "author_id,created_at,public_metrics", max_results = 20)
res <- httr::GET(url = RecentSearchURL, httr::add_headers(.headers = headers), query = params)

Review the response

obj <- httr::content(res, as = "text")
tweetData <- fromJSON(obj, flatten = TRUE) %>% as.data.frame()
as_tibble(head(tweetData))

Export the data

write(tweetText, "recent_tweets_ja.txt")

日本語テキスト処理

RMeCabの読み込み

path_home <- system("echo $HOME",intern=T)
lib_path <- paste(path_home,"/usr/local/lib/libmecab.so.2", sep="")
dyn.load(lib_path)
library(RMeCab)

形態素解析

tweetsDF <- docDF("recent_tweets_ja.txt", type=1)
file_name =  ./recent_tweets_ja.txt opened
number of extracted terms = 282
now making a data frame. wait a while!
sub_tweetsDF <- tweetsDF %>% filter(POS1 %in% c("名詞", "形容詞", "副詞"), POS2 %in% c("一般", "固有名詞","自立"))
head(sub_tweetsDF)

Wordcloud visualization

library(wordcloud2)
sub_tweetsDF[,c(1,4)]
wordcloud2(sub_tweetsDF[,c(1,4)], size = 0.8, shape = 'pentagon')

Specify pipes: “%>datetime values

params <- list(query = "(chatGPT OR OpenAI)(lang:en)(-is:retweet)", tweet.fields = "author_id,created_at,public_metrics", start_time = "2023-01-08T00:00:00Z", end_time = "2023-01-09T00:00:00Z", max_results = 20)

httr::GET(url = RecentSearchURL, httr::add_headers(.headers = headers), query = params) %>% httr::content(., as = "text") %>% fromJSON(., flatten = TRUE)  %>% as.data.frame() -> tweetData2 
as_tibble(head(tweetData2))
params <- list(query = "(chatGPT OR OpenAI)(lang:en)(-is:retweet)", tweet.fields = "author_id,created_at,public_metrics", start_time = "2023-01-10T00:00:00Z", end_time = "2023-01-12T00:00:00Z", max_results = 20)

httr::GET(url = RecentSearchURL, httr::add_headers(.headers = headers), query = params) %>% httr::content(., as = "text") %>% fromJSON(., flatten = TRUE)  %>% as.data.frame() -> tweetData3 
as_tibble(head(tweetData3))
library("wordcloud")
Loading required package: RColorBrewer
#colnames(tweetData2)
gsub("http\\S+", "", tweetData2$data.text) %>% gsub("(\\n|&gt)", "", .) %>% strsplit(.,"[[:space:]]|[[:punct:]]") %>% unlist %>% .[. != ""]  %>% table -> wordLst2
wordcloud(names(wordLst2), wordLst2)

LS0tCnRpdGxlOiAiTGVjMTE6IFR3aXR0ZXIgQVBJIHYyIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIExlYzExCgojIyBUd2l0dGVyIEFQSeOBruWIqeeUqAojIyMgUmVmZXJlbmNlcworIDxhIGhyZWY9Imh0dHBzOi8vZGV2ZWxvcGVyLnR3aXR0ZXIuY29tL2VuIiB0YXJnZXQ9Il9ibGFuayI+RGV2ZWxvcGVyIFBsYXRmb3JtPC9hPgorIDxhIGhyZWY9Imh0dHBzOi8vZGV2ZWxvcGVyLnR3aXR0ZXIuY29tL2VuL2RvY3MvdHdpdHRlci1hcGkvZ2V0dGluZy1zdGFydGVkL2Fib3V0LXR3aXR0ZXItYXBpI2l0ZW0wIiB0YXJnZXQ9Il9ibGFuayI+RGV2ZWxvcGVyIFBsYXRmb3JtOiBEb2NzPC9hPgorIDxhIGhyZWY9Imh0dHBzOi8vcnB1YnMuY29tL2NzY2h3YXJ6L3NtYXBwTGFiMyIgdGFyZ2V0PSJfYmxhbmsiPkludHJvZHVjdGlvbiB0byBUd2l0dGVyIEFQSSB2MjwvYT4KKyA8YSBocmVmPSJodHRwczovL2ZyYWJhLmdpdGh1Yi5pby9yLWFjYWRlbWljLXR3aXR0ZXIvZmlyc3Qtc3RlcHMuaHRtbCNpbnRlcnJvZ2F0aW5nLXRoZS10d2l0dGVyLWFwaSIgdGFyZ2V0PSJfYmxhbmsiPkZyYW5jZXNjbyBCYWlsby4gMjAyMi4gIlR3aXR0ZXIgQVBJIEFjYWRlbWljIFJlc2VhcmNoIGFjY2VzcyB3aXRoIFIiPC9hPgorIDxhIGhyZWY9Imh0dHBzOi8vZ2l0aHViLmNvbS9NYWVsS3VibGkvUlR3aXR0ZXJWMiIgdGFyZ2V0PSJfYmxhbmsiPlJUd2l0dGVyVjI8L2E+CgojIyMgSW5zdGFsbCBwYWNrYWdlczogaHR0ciwganNvbmxpdGUKYGBge3IsIGV2YWw9RkFMU0V9Cmluc3RhbGwucGFja2FnZXMoImh0dHIiKQppbnN0YWxsLnBhY2thZ2VzKCJqc29ubGl0ZSIpCiNpbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIpCmluc3RhbGwucGFja2FnZXMoInBseXIiKQpgYGAKCiMjIyDjg5Hjg4PjgrHjg7zjgrjjga7oqq3jgb/ovrzjgb8KYGBge3J9CmxpYnJhcnkoaHR0cikKbGlicmFyeShqc29ubGl0ZSkKbGlicmFyeShkcGx5cikKbGlicmFyeShwbHlyKQpgYGAKCiMjIyBCZWFyZXIgQXV0aGVudGljYXRpb24KYGBge3J9CmhlYWRlcnMgPC0gYyhgQXV0aG9yaXphdGlvbmAgPSBzcHJpbnRmKCdCZWFyZXIgJXMnLCAgU3lzLmdldGVudigiVFdJVFRFUl9CRUFSRVIiKSkpCmBgYAoKIyMjIEdldCB0d2VldHMgd2l0aCBzcGVjaWZpYyB3b3JkcyAKYGBge3J9ClJlY2VudFNlYXJjaFVSTCA8LSAnaHR0cHM6Ly9hcGkudHdpdHRlci5jb20vMi90d2VldHMvc2VhcmNoL3JlY2VudCcKcGFyYW1zIDwtIGxpc3QocXVlcnkgPSAiKGNoYXRHUFQgT1IgT3BlbkFJKSgtaXM6cmV0d2VldCkiLCB0d2VldC5maWVsZHMgPSAiYXV0aG9yX2lkLGNyZWF0ZWRfYXQscHVibGljX21ldHJpY3MiLCBtYXhfcmVzdWx0cyA9IDIwKQpyZXMgPC0gaHR0cjo6R0VUKHVybCA9IFJlY2VudFNlYXJjaFVSTCwgaHR0cjo6YWRkX2hlYWRlcnMoLmhlYWRlcnMgPSBoZWFkZXJzKSwgcXVlcnkgPSBwYXJhbXMpCmBgYAoKCmBgYHtyLCBldmFsPUZBTFNFfQpyZXMKYGBgCgojIyMgUmV2aWV3IHRoZSByZXNwb25zZSBpbiB0aGUgSlNPTiBmb3JtYXQKYGBge3IsIGV2YWw9RkFMU0V9Cm9iaiA8LSBodHRyOjpjb250ZW50KHJlcywgYXMgPSAidGV4dCIpCiNwcmV0dGlmeShvYmopCmBgYAoKIyMjIFJldmlldyB0aGUgcmVzcG9uc2UgaW4gdGhlIEpTT04gZm9ybWF0CisgPGEgaHJlZj0iaHR0cHM6Ly9xaWl0YS5jb20vUXVhbnRhcy9pdGVtcy8xYTIxMDczNDFiMTQ3NmNlNjA0NCIgdGFyZ2V0PSJfYmxhbmsiPnBpcGVzOiAiJT4lIjwvYT4KYGBge3J9CnR3ZWV0RGF0YSA8LSBmcm9tSlNPTihvYmosIGZsYXR0ZW4gPSBUUlVFKSAlPiUgYXMuZGF0YS5mcmFtZSgpCiN0bXAgPC0gZnJvbUpTT04ob2JqLCBmbGF0dGVuID0gVFJVRSkgCiNhcy5kYXRhLmZyYW1lKHRtcCkKYGBgCgojIyMgUmV2aWV3IHRoZSByZXNwb25zZSBpbiBhIDxhIGhyZWY9Imh0dHBzOi8vdGliYmxlLnRpZHl2ZXJzZS5vcmcvIiB0YXJnZXQ9Il9ibGFuayI+dGliYmxlPC9hPgpgYGB7cn0KYXNfdGliYmxlKGhlYWQodHdlZXREYXRhKSkKYGBgCgojIyMgR2V0IHR3ZWV0cyB3aXRoIHNwZWNpZmljIHdvcmRzICYgbGFuZ3VhZ2UKIyMjIyBTcGVjaWZ5IEVuZ2xpc2ggbGFuZ2FnZTogImxhbmc6ZW4iCmBgYHtyfQpwYXJhbXMgPC0gbGlzdChxdWVyeSA9ICIoY2hhdEdQVCBPUiBPcGVuQUkpKGxhbmc6ZW4pKC1pczpyZXR3ZWV0KSIsIHR3ZWV0LmZpZWxkcyA9ICJhdXRob3JfaWQsY3JlYXRlZF9hdCxwdWJsaWNfbWV0cmljcyIsIG1heF9yZXN1bHRzID0gMjApCnJlcyA8LSBodHRyOjpHRVQodXJsID0gUmVjZW50U2VhcmNoVVJMLCBodHRyOjphZGRfaGVhZGVycyguaGVhZGVycyA9IGhlYWRlcnMpLCBxdWVyeSA9IHBhcmFtcykKYGBgCgojIyMjIFJldmlldyB0aGUgcmVzcG9uc2UKYGBge3J9Cm9iaiA8LSBodHRyOjpjb250ZW50KHJlcywgYXMgPSAidGV4dCIpCnR3ZWV0RGF0YSA8LSBmcm9tSlNPTihvYmosIGZsYXR0ZW4gPSBUUlVFKSAlPiUgYXMuZGF0YS5mcmFtZSgpCmFzX3RpYmJsZShoZWFkKHR3ZWV0RGF0YSkpCmBgYAoKIyMjIyBTcGVjaWZ5IEVuZ2xpc2ggbGFuZ2FnZTogImxhbmc6amEiCmBgYHtyfQpwYXJhbXMgPC0gbGlzdChxdWVyeSA9ICIoY2hhdEdQVCBPUiBPcGVuQUkpKGxhbmc6amEpKC1pczpyZXR3ZWV0KSIsIHR3ZWV0LmZpZWxkcyA9ICJhdXRob3JfaWQsY3JlYXRlZF9hdCxwdWJsaWNfbWV0cmljcyIsIG1heF9yZXN1bHRzID0gMjApCnJlcyA8LSBodHRyOjpHRVQodXJsID0gUmVjZW50U2VhcmNoVVJMLCBodHRyOjphZGRfaGVhZGVycyguaGVhZGVycyA9IGhlYWRlcnMpLCBxdWVyeSA9IHBhcmFtcykKYGBgCgojIyMjIFJldmlldyB0aGUgcmVzcG9uc2UKYGBge3J9Cm9iaiA8LSBodHRyOjpjb250ZW50KHJlcywgYXMgPSAidGV4dCIpCnR3ZWV0RGF0YSA8LSBmcm9tSlNPTihvYmosIGZsYXR0ZW4gPSBUUlVFKSAlPiUgYXMuZGF0YS5mcmFtZSgpCmFzX3RpYmJsZShoZWFkKHR3ZWV0RGF0YSkpCmBgYAoKIyMjIFJlbW92ZSAidC5jbyIgbGlua3MgaW4gdGhlICJkYXRhLnRleHQiIGNvbHVtbgpgYGB7cn0KdHdlZXRUZXh0IDwtIGdzdWIoImh0dHBcXFMrIiwgIiIsIHR3ZWV0RGF0YSRkYXRhLnRleHQpCmBgYAoKIyMjIEV4cG9ydCB0aGUgZGF0YSAKYGBge3IsIGV2YWw9RkFMU0V9CndyaXRlKHR3ZWV0VGV4dCwgInJlY2VudF90d2VldHNfamEudHh0IikKYGBgCgoKIyMg5pel5pys6Kqe44OG44Kt44K544OI5Yem55CGCiMjIyBSTWVDYWLjga7oqq3jgb/ovrzjgb8KYGBge3J9CnBhdGhfaG9tZSA8LSBzeXN0ZW0oImVjaG8gJEhPTUUiLGludGVybj1UKQpsaWJfcGF0aCA8LSBwYXN0ZShwYXRoX2hvbWUsIi91c3IvbG9jYWwvbGliL2xpYm1lY2FiLnNvLjIiLCBzZXA9IiIpCmR5bi5sb2FkKGxpYl9wYXRoKQpsaWJyYXJ5KFJNZUNhYikKYGBgCgojIyMg5b2i5oWL57Sg6Kej5p6QCmBgYHtyfQp0d2VldHNERiA8LSBkb2NERigicmVjZW50X3R3ZWV0c19qYS50eHQiLCB0eXBlPTEpCnN1Yl90d2VldHNERiA8LSB0d2VldHNERiAlPiUgZmlsdGVyKFBPUzEgJWluJSBjKCLlkI3oqZ4iLCAi5b2i5a656KmeIiwgIuWJr+ipniIpLCBQT1MyICVpbiUgYygi5LiA6IisIiwgIuWbuuacieWQjeipniIsIuiHqueriyIpKQpoZWFkKHN1Yl90d2VldHNERikKYGBgCgojIyMgV29yZGNsb3VkIHZpc3VhbGl6YXRpb24KYGBge3J9CmxpYnJhcnkod29yZGNsb3VkMikKc3ViX3R3ZWV0c0RGWyxjKDEsNCldCndvcmRjbG91ZDIoc3ViX3R3ZWV0c0RGWyxjKDEsNCldLCBzaXplID0gMC44LCBzaGFwZSA9ICdwZW50YWdvbicpCmBgYAoKIyMjIyBTcGVjaWZ5IDxhIGhyZWY9Imh0dHBzOi8vZGV2ZWxvcGVyLnR3aXR0ZXIuY29tL2VuL2RvY3MvdHdpdHRlci1hZHMtYXBpL3RpbWV6b25lcyIgdGFyZ2V0PSJfYmxhbmsiPnBpcGVzOiAiJT5kYXRldGltZSB2YWx1ZXM8L2E+CmBgYHtyfQpwYXJhbXMgPC0gbGlzdChxdWVyeSA9ICIoY2hhdEdQVCBPUiBPcGVuQUkpKGxhbmc6ZW4pKC1pczpyZXR3ZWV0KSIsIHR3ZWV0LmZpZWxkcyA9ICJhdXRob3JfaWQsY3JlYXRlZF9hdCxwdWJsaWNfbWV0cmljcyIsIHN0YXJ0X3RpbWUgPSAiMjAyMy0wMS0wOFQwMDowMDowMFoiLCBlbmRfdGltZSA9ICIyMDIzLTAxLTA5VDAwOjAwOjAwWiIsIG1heF9yZXN1bHRzID0gMjApCgpodHRyOjpHRVQodXJsID0gUmVjZW50U2VhcmNoVVJMLCBodHRyOjphZGRfaGVhZGVycyguaGVhZGVycyA9IGhlYWRlcnMpLCBxdWVyeSA9IHBhcmFtcykgJT4lIGh0dHI6OmNvbnRlbnQoLiwgYXMgPSAidGV4dCIpICU+JSBmcm9tSlNPTiguLCBmbGF0dGVuID0gVFJVRSkgICU+JSBhcy5kYXRhLmZyYW1lKCkgLT4gdHdlZXREYXRhMiAKYXNfdGliYmxlKGhlYWQodHdlZXREYXRhMikpCmBgYAoKYGBge3J9CnBhcmFtcyA8LSBsaXN0KHF1ZXJ5ID0gIihjaGF0R1BUIE9SIE9wZW5BSSkobGFuZzplbikoLWlzOnJldHdlZXQpIiwgdHdlZXQuZmllbGRzID0gImF1dGhvcl9pZCxjcmVhdGVkX2F0LHB1YmxpY19tZXRyaWNzIiwgc3RhcnRfdGltZSA9ICIyMDIzLTAxLTEwVDAwOjAwOjAwWiIsIGVuZF90aW1lID0gIjIwMjMtMDEtMTJUMDA6MDA6MDBaIiwgbWF4X3Jlc3VsdHMgPSAyMCkKCmh0dHI6OkdFVCh1cmwgPSBSZWNlbnRTZWFyY2hVUkwsIGh0dHI6OmFkZF9oZWFkZXJzKC5oZWFkZXJzID0gaGVhZGVycyksIHF1ZXJ5ID0gcGFyYW1zKSAlPiUgaHR0cjo6Y29udGVudCguLCBhcyA9ICJ0ZXh0IikgJT4lIGZyb21KU09OKC4sIGZsYXR0ZW4gPSBUUlVFKSAgJT4lIGFzLmRhdGEuZnJhbWUoKSAtPiB0d2VldERhdGEzIAphc190aWJibGUoaGVhZCh0d2VldERhdGEzKSkKYGBgCgpgYGB7cn0KbGlicmFyeSgid29yZGNsb3VkIikKYGBgCgpgYGB7cn0KI2NvbG5hbWVzKHR3ZWV0RGF0YTIpCmdzdWIoImh0dHBcXFMrIiwgIiIsIHR3ZWV0RGF0YTIkZGF0YS50ZXh0KSAlPiUgZ3N1YigiKFxcbnwmZ3QpIiwgIiIsIC4pICU+JSBzdHJzcGxpdCguLCJbWzpzcGFjZTpdXXxbWzpwdW5jdDpdXSIpICU+JSB1bmxpc3QgJT4lIC5bLiAhPSAiIl0gICU+JSB0YWJsZSAtPiB3b3JkTHN0MgpgYGAKCmBgYHtyfQp3b3JkY2xvdWQobmFtZXMod29yZExzdDIpLCB3b3JkTHN0MikKYGBgCiMjIyDmhJ/mg4XliIbmnpAKKyA8YSBocmVmPSJodHRwczovL3d3dy50aWR5dGV4dG1pbmluZy5jb20vc2VudGltZW50Lmh0bWwiIHRhcmdldD0iX2JsYW5rIj5KdWxpYSBTaWxnZSAmIERhdmlkIFJvYmluc29uICgyMDE3KSAiVGV4dCBNaW5pbmcgd2l0aCBSIiwgTydSZWlsbHk8L2E+CisgPGEgaHJlZj0iaHR0cHM6Ly93d3cuY2wuZWNlaS50b2hva3UuYWMuanAvT3Blbl9SZXNvdXJjZXMtSmFwYW5lc2VfU2VudGltZW50X1BvbGFyaXR5X0RpY3Rpb25hcnkuaHRtbCIgdGFyZ2V0PSJfYmxhbmsiPuaXpeacrOiqnuipleS+oealteaAp+i+nuabuDwvYT4KCgoKCgo=