extracting tweets from tweeter by the use of twitter package.
is an R package which provides access to the Twitter API. Most functionality of the API is supported, with a bias towards API calls that are more useful in data analysis as opposed to daily interaction
ggplot
is an R package for data exploration and producing plots. It produces fantastic-looking graphics and allows one to slice and dice one’s data in many different ways.
tm
The tm package offers functionality for managing text documents, abstracts the process of document manipulation and eases the usage of heterogeneous text formats in R.
word cloud
this was use to display the accounts that tweets about the specific searchterm. Functionality to create pretty word clouds, visualize differences and similarity between documents, and avoid over-plotting in scatter plots with text.
library(twitteR)
set the credentials
CONSUMER_SECRET <- "SLnZ0471eZ91XXSN2SSN8iqO6zQmC05en3p6C8jbjB7IFpAafJ"
CONSUMER_KEY <- "POhYy8svLXAc7ZbfRG7Wj8NHP"
ACCESS_SECRET <- "1391946408710664193-JvQYfEKChN5nYMWwUGbV8San4ARy9g"
ACCESS_TOKEN <- "Nn4O7hAvhBpiTUEK0hwFtPbxjV5uIoRP2r7DrcxIDtfyo"
connect to twitter app
setup_twitter_oauth(CONSUMER_KEY,CONSUMER_SECRET,ACCESS_SECRET,ACCESS_TOKEN )
## [1] "Using direct authentication"
set twitter user
twitterUser<- getUser("weareoneEXO")
class(twitterUser)
## [1] "user"
## attr(,"package")
## [1] "twitteR"
str(twitterUser)
## Reference class 'user' [package "twitteR"] with 18 fields
## $ description : chr "EXO (<U+C5D1><U+C18C>) Official We are ONE<U+0001F44D>EXO <U+C0AC><U+B791><U+D558><U+C790>!"
## $ statusesCount : num 3067
## $ followersCount : num 11116141
## $ favoritesCount : num 166
## $ friendsCount : num 4
## $ url : chr "https://t.co/qbCx4jCmGW"
## $ name : chr "EXO"
## $ created : POSIXct[1:1], format: "2017-06-09 09:51:57"
## $ protected : logi FALSE
## $ verified : logi TRUE
## $ screenName : chr "weareoneEXO"
## $ location : chr ""
## $ lang : chr(0)
## $ id : chr "873115441303924736"
## $ lastStatus :Reference class 'status' [package "twitteR"] with 17 fields
## ..$ text : chr "<U+203B> <U+D065><U+D29C><U+BE0C><U+B294> <U+B9E4><U+C6D4> 17<U+C77C> <U+C624><U+D6C4> 7<U+C2DC> <U+C5C5><U+B85"| __truncated__
## ..$ favorited : logi FALSE
## ..$ favoriteCount: num 36862
## ..$ replyToSN : chr "weareoneEXO"
## ..$ created : POSIXct[1:1], format: "2021-05-13 10:02:48"
## ..$ truncated : logi TRUE
## ..$ replyToSID : chr "1392782370793615364"
## ..$ id : chr "1392782372391718916"
## ..$ replyToUID : chr "873115441303924736"
## ..$ statusSource : chr "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>"
## ..$ screenName : chr "Unknown"
## ..$ retweetCount : num 14937
## ..$ isRetweet : logi FALSE
## ..$ retweeted : logi FALSE
## ..$ longitude : chr(0)
## ..$ latitude : chr(0)
## ..$ urls :'data.frame': 1 obs. of 5 variables:
## .. ..$ url : chr "https://t.co/6Jl1tra543"
## .. ..$ expanded_url: chr "https://twitter.com/i/web/status/1392782372391718916"
## .. ..$ display_url : chr "twitter.com/i/web/status/1…"
## .. ..$ start_index : num 117
## .. ..$ stop_index : num 140
## ..and 53 methods, of which 39 are possibly relevant:
## .. getCreated, getFavoriteCount, getFavorited, getId, getIsRetweet,
## .. getLatitude, getLongitude, getReplyToSID, getReplyToSN, getReplyToUID,
## .. getRetweetCount, getRetweeted, getRetweeters, getRetweets, getScreenName,
## .. getStatusSource, getText, getTruncated, getUrls, initialize, setCreated,
## .. setFavoriteCount, setFavorited, setId, setIsRetweet, setLatitude,
## .. setLongitude, setReplyToSID, setReplyToSN, setReplyToUID,
## .. setRetweetCount, setRetweeted, setScreenName, setStatusSource, setText,
## .. setTruncated, setUrls, toDataFrame, toDataFrame#twitterObj
## $ listedCount : num 13730
## $ followRequestSent: logi FALSE
## $ profileImageUrl : chr "http://pbs.twimg.com/profile_images/1391770003339300864/qJWPFDbj_normal.jpg"
## and 59 methods, of which 45 are possibly relevant:
## getCreated, getDescription, getFavorites, getFavoritesCount,
## getFavouritesCount, getFollowerIDs, getFollowers, getFollowersCount,
## getFollowRequestSent, getFriendIDs, getFriends, getFriendsCount, getId,
## getLang, getLastStatus, getListedCount, getLocation, getName,
## getProfileImageUrl, getProtected, getScreenName, getStatusesCount, getUrl,
## getVerified, initialize, setCreated, setDescription, setFavoritesCount,
## setFollowersCount, setFollowRequestSent, setFriendsCount, setId, setLang,
## setLastStatus, setListedCount, setLocation, setName, setProfileImageUrl,
## setProtected, setScreenName, setStatusesCount, setUrl, setVerified,
## toDataFrame, toDataFrame#twitterObj
Extract tweets from user’s timeline
tweets <- userTimeline(twitterUser, n = 10)
tweets <- userTimeline(twitterUser, n = 10, includeRts = T)
tweets <- userTimeline(twitterUser, n = 3200, includeRts = T, )
display attributes and function of tweet object
tweets[[1]]$getClass()
## Reference Class "status":
##
## Class fields:
##
## Name: text favorited favoriteCount replyToSN created
## Class: character logical numeric character POSIXct
##
## Name: truncated replyToSID id replyToUID statusSource
## Class: logical character character character character
##
## Name: screenName retweetCount isRetweet retweeted longitude
## Class: character numeric logical logical character
##
## Name: latitude urls
## Class: character data.frame
##
## Class Methods:
## "setUrls", "getRetweets", "getRefClass", "getUrls", "setTruncated",
## "setText", "getReplyToSID", "getText", "export", "setCreated",
## "setFavoriteCount", "getCreated", "initialize", "callSuper",
## "getRetweeters", "initFields", "getClass", "setReplyToUID", "import",
## "setLatitude", "setIsRetweet", "getFavoriteCount", "getRetweetCount",
## "getIsRetweet", "setId", "setScreenName", "getLatitude", "getScreenName",
## "toDataFrame#twitterObj", "setRetweetCount", "setReplyToSID", "getId",
## "getReplyToUID", "setFavorited", "getRetweeted", "getFavorited",
## "toDataFrame", "setStatusSource", "setReplyToSN", "copy", "usingMethods",
## "setRetweeted", "field", ".objectParent", "getTruncated", "untrace",
## "trace", "setLongitude", "getLongitude", "getStatusSource",
## ".objectPackage", "getReplyToSN", "show"
##
## Reference Superclasses:
## "twitterObj", "envRefClass"
class(tweets[[1]]
display main body(text) of tweet from row 1 to row 5
tweets$text[[1:5]]
## NULL
display main body text of tweet
tweets[[5]]$text
## [1] "<U+0001F4BF><U+0001F6D2> \n\n[Photo Book Ver.2]\n\nYES24<U+27AB> https://t.co/yrhKdDA5Yn \nAladin<U+27AB> https://t.co/h2hl0onhXY \nHottracks<U+27AB>… https://t.co/aYnDHXhZ4P"
display favorite count
tweets[[1]]$favoriteCount
## [1] 36862
display retweet count
tweets[[1]]$retweetCount
## [1] 14937
check current rate limit (Check /statuses/user_timeline)
getCurRateLimitInfo()
extract tweets based on a search term
searchTerm <- "#EXO"
trendingTweets = searchTwitter(searchTerm,n=5000,lang = "en")
class(trendingTweets)
## [1] "list"
head(trendingTweets)
## [[1]]
## [1] "sundaemorningss: @PhixoBar @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #<U+C2DC><U+C6B0><U+BBFC>\n#EXO #<U+C5D1><U+C18C> \n@weareoneEXO"
##
## [[2]]
## [1] "CalliopeCiello: RT @exoluniverseph: EXO-Ls, Please help us win some funds for @weareoneEXO's Comeback <U+0001F64F><U+0001F3FC>\n\nPlease rt and like our tweet and make sure to fol…"
##
## [[3]]
## [1] "dviviofdsehun: RT @CITYLIGHTS_LIT: @dviviofdsehun @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #<U+C2DC><U+C6B0><U+BBFC>\n#EXO #<U+C5D1><U+C18C> \n@weareoneEXO"
##
## [[4]]
## [1] "fffirelight: RT @PHIXOT9: Eris! We got this deal! Let us unite to win this another fight <U+2764><U+FE0F> Like and rt below!! Let us do this for our boys we can do it…"
##
## [[5]]
## [1] "xmyeonmin: @aerieya @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #<U+C2DC><U+C6B0><U+BBFC>\n#EXO #<U+C5D1><U+C18C> \n@weareoneEXO"
##
## [[6]]
## [1] "12IRBLDR13: RT @exoluniverseph: EXO-Ls, Please help us win some funds for @weareoneEXO's Comeback <U+0001F64F><U+0001F3FC>\n\nPlease rt and like our tweet and make sure to fol…"
str(trendingTweets[[1]])
## Reference class 'status' [package "twitteR"] with 17 fields
## $ text : chr "@PhixoBar @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #<U+C2DC><U+C6B0><U+BBFC>\n#EXO #<U+C5D1><U+C18C> \n@weareoneEXO"
## $ favorited : logi FALSE
## $ favoriteCount: num 0
## $ replyToSN : chr "PhixoBar"
## $ created : POSIXct[1:1], format: "2021-05-14 02:06:32"
## $ truncated : logi FALSE
## $ replyToSID : chr "1393024821823344640"
## $ id : chr "1393024901301235714"
## $ replyToUID : chr "1369318888538771457"
## $ statusSource : chr "<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>"
## $ screenName : chr "sundaemorningss"
## $ retweetCount : num 0
## $ isRetweet : logi FALSE
## $ retweeted : logi FALSE
## $ longitude : chr(0)
## $ latitude : chr(0)
## $ urls :'data.frame': 0 obs. of 4 variables:
## ..$ url : chr(0)
## ..$ expanded_url: chr(0)
## ..$ dispaly_url : chr(0)
## ..$ indices : num(0)
## and 53 methods, of which 39 are possibly relevant:
## getCreated, getFavoriteCount, getFavorited, getId, getIsRetweet,
## getLatitude, getLongitude, getReplyToSID, getReplyToSN, getReplyToUID,
## getRetweetCount, getRetweeted, getRetweeters, getRetweets, getScreenName,
## getStatusSource, getText, getTruncated, getUrls, initialize, setCreated,
## setFavoriteCount, setFavorited, setId, setIsRetweet, setLatitude,
## setLongitude, setReplyToSID, setReplyToSN, setReplyToUID, setRetweetCount,
## setRetweeted, setScreenName, setStatusSource, setText, setTruncated,
## setUrls, toDataFrame, toDataFrame#twitterObj
covert trending tweets List into trendingtweetsdf
trendingTweets.df = twListToDF(trendingTweets)
trendingTweets.df$text <- sapply(trendingTweets.df$text,function(x) iconv(x,to='UTF-8'))
save(trendingTweets.df, file = "trendingTweets202105.Rda")
trendingTweets.df$text[[1]]
## [1] "@PhixoBar @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #시우민\n#EXO #엑소 \n@weareoneEXO"
1.library(twitteR)
2.library(tm)
library(stringr)
library(wordcloud)
library(syuzhet)
library(RColorBrewer)
library(ggplot2)
library(twitteR)
library(wordcloud)
## Loading required package: RColorBrewer
library(RColorBrewer)
library(tm)
## Loading required package: NLP
library(stringr)
library(syuzhet)
#accounts which tweet about EXO
namesCorpus <- Corpus(VectorSource(trendingTweets.df$screenName))
class(trendingTweets.df$screenName)
## [1] "character"
class(VectorSource(trendingTweets.df$screenName))
## [1] "VectorSource" "SimpleSource" "Source"
class(namesCorpus)
## [1] "SimpleCorpus" "Corpus"
trendingTweets.df$text <- sapply(trendingTweets.df$text,function(x) iconv(x,to='UTF-8'))
head(trendingTweets.df$text)
## [1] "@PhixoBar @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #ìâ\200¹Å“우민\n#EXO #ìâ\200”â\200\230ìâ\200 Å’ \n@weareoneEXO"
## [2] "RT @exoluniverseph: EXO-Ls, Please help us win some funds for @weareoneEXO's Comeback ðŸ™Â\217ðŸÂ\217¼\n\nPlease rt and like our tweet and make sure to fol…"
## [3] "RT @CITYLIGHTS_LIT: @dviviofdsehun @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #ìâ\200¹Å“우민\n#EXO #ìâ\200”â\200\230ìâ\200 Å’ \n@weareoneEXO"
## [4] "RT @PHIXOT9: Eris! We got this deal! Let us unite to win this another fight âÂ\235¤ï¸Â\217 Like and rt below!! Let us do this for our boys we can do it…"
## [5] "@aerieya @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #ìâ\200¹Å“우민\n#EXO #ìâ\200”â\200\230ìâ\200 Å’ \n@weareoneEXO"
## [6] "RT @exoluniverseph: EXO-Ls, Please help us win some funds for @weareoneEXO's Comeback ðŸ™Â\217ðŸÂ\217¼\n\nPlease rt and like our tweet and make sure to fol…"
#Cleaning the data
nohandles <- str_replace_all(trendingTweets.df$text, "@\\w+", " ")
nohandles$cleanedText <- gsub("http.*", " ", nohandles)
## Warning in nohandles$cleanedText <- gsub("http.*", " ", nohandles): Coercing LHS
## to a list
nohandles$cleanedText <- gsub("https.*", " ", nohandles$cleanedText )
head(nohandles$cleanedText)
## [1] " ACTOR XIUMIN IS BACK\n\n#XIUMIN #ìâ\200¹Å“우민\n#EXO #ìâ\200”â\200\230ìâ\200 Å’ \n "
## [2] "RT : EXO-Ls, Please help us win some funds for 's Comeback ðŸ™Â\217ðŸÂ\217¼\n\nPlease rt and like our tweet and make sure to fol…"
## [3] "RT : ACTOR XIUMIN IS BACK\n\n#XIUMIN #ìâ\200¹Å“우민\n#EXO #ìâ\200”â\200\230ìâ\200 Å’ \n "
## [4] "RT : Eris! We got this deal! Let us unite to win this another fight âÂ\235¤ï¸Â\217 Like and rt below!! Let us do this for our boys we can do it…"
## [5] " ACTOR XIUMIN IS BACK\n\n#XIUMIN #ìâ\200¹Å“우민\n#EXO #ìâ\200”â\200\230ìâ\200 Å’ \n "
## [6] "RT : EXO-Ls, Please help us win some funds for 's Comeback ðŸ™Â\217ðŸÂ\217¼\n\nPlease rt and like our tweet and make sure to fol…"
nohandles$cleanedText <- str_replace_all(nohandles$cleanedText, "[^[:alnum:]]", " ")
nohandles$cleanedText <- str_replace_all(nohandles$cleanedText,"[[^a-zA-Z0-9]]", " ")
head(nohandles$cleanedText)
## [1] " ACTOR XIUMIN IS BACK XIUMIN EXO "
## [2] "RT EXO Ls Please help us win some funds for s Comeback Please rt and like our tweet and make sure to fol "
## [3] "RT ACTOR XIUMIN IS BACK XIUMIN EXO "
## [4] "RT Eris We got this deal Let us unite to win this another fight Like and rt below Let us do this for our boys we can do it "
## [5] " ACTOR XIUMIN IS BACK XIUMIN EXO "
## [6] "RT EXO Ls Please help us win some funds for s Comeback Please rt and like our tweet and make sure to fol "
wordCorpus <- Corpus(VectorSource(nohandles$cleanedText))
wordCorpus[[1]]$content
## [1] " ACTOR XIUMIN IS BACK XIUMIN EXO "
#removing punctuations
wordCorpus <- tm_map(wordCorpus, removePunctuation)(wordCorpus[[1]]$content)
wordCorpus <- tm_map(wordCorpus, removeNumbers)
wordCorpus <- tm_map(wordCorpus, content_transformer(tolower))
wordCorpus[[1]]$content
#removing words such as “a”, “an”, “the”, “is”, etc
wordCorpus <- tm_map(wordCorpus, removeWords, stopwords(“english”))
wordCorpus <- tm_map(wordCorpus, removeWords, stopwords(“SMART”))
wordCorpus[[1]]$content
#manual removing of words that cannot be read
wordCorpus <- tm_map(wordCorpus, removeWords, c("amp"))
## Warning in tm_map.SimpleCorpus(wordCorpus, removeWords, c("amp")):
## transformation drops documents
wordCorpus <- tm_map(wordCorpus, removeWords, c("ste"))#manual assignment
## Warning in tm_map.SimpleCorpus(wordCorpus, removeWords, c("ste")):
## transformation drops documents
wordCorpus[[5]]$content
## [1] " ACTOR XIUMIN IS BACK XIUMIN EXO "
wordCorpus <- tm_map(wordCorpus, stripWhitespace)
## Warning in tm_map.SimpleCorpus(wordCorpus, stripWhitespace): transformation
## drops documents
wordCorpus[[1]]$content
## [1] " ACTOR XIUMIN IS BACK XIUMIN EXO "
str(wordCorpus)
library(wordcloud)
pal <- brewer.pal(9,"YlGnBu")
pal <- pal[-(1:4)]
set.seed(123)
par(mfrow = c(1,1))
wordcloud(words = wordCorpus, scale=c(5,0.3), max.words=500, random.order=FALSE,
rot.per=0.4, use.r.layout=FALSE, font.main=5,cex.main=1.5, colors=brewer.pal(8, "Dark2"))