Extracted tweets

extracting tweets from tweeter by the use of twitter package.

Types of Packages I used

twitteR

is an R package which provides access to the Twitter API. Most functionality of the API is supported, with a bias towards API calls that are more useful in data analysis as opposed to daily interaction

ggplot

is an R package for data exploration and producing plots. It produces fantastic-looking graphics and allows one to slice and dice one’s data in many different ways.

tm

The tm package offers functionality for managing text documents, abstracts the process of document manipulation and eases the usage of heterogeneous text formats in R.

word cloud

this was use to display the accounts that tweets about the specific searchterm. Functionality to create pretty word clouds, visualize differences and similarity between documents, and avoid over-plotting in scatter plots with text.

library(twitteR)

set the credentials

CONSUMER_SECRET <- "SLnZ0471eZ91XXSN2SSN8iqO6zQmC05en3p6C8jbjB7IFpAafJ"
CONSUMER_KEY <- "POhYy8svLXAc7ZbfRG7Wj8NHP"
ACCESS_SECRET <- "1391946408710664193-JvQYfEKChN5nYMWwUGbV8San4ARy9g"
ACCESS_TOKEN <- "Nn4O7hAvhBpiTUEK0hwFtPbxjV5uIoRP2r7DrcxIDtfyo"

connect to twitter app

setup_twitter_oauth(CONSUMER_KEY,CONSUMER_SECRET,ACCESS_SECRET,ACCESS_TOKEN )
## [1] "Using direct authentication"

set twitter user

twitterUser<- getUser("weareoneEXO")

class(twitterUser)
## [1] "user"
## attr(,"package")
## [1] "twitteR"
str(twitterUser)
## Reference class 'user' [package "twitteR"] with 18 fields
##  $ description      : chr "EXO (<U+C5D1><U+C18C>) Official We are ONE<U+0001F44D>EXO <U+C0AC><U+B791><U+D558><U+C790>!"
##  $ statusesCount    : num 3067
##  $ followersCount   : num 11116141
##  $ favoritesCount   : num 166
##  $ friendsCount     : num 4
##  $ url              : chr "https://t.co/qbCx4jCmGW"
##  $ name             : chr "EXO"
##  $ created          : POSIXct[1:1], format: "2017-06-09 09:51:57"
##  $ protected        : logi FALSE
##  $ verified         : logi TRUE
##  $ screenName       : chr "weareoneEXO"
##  $ location         : chr ""
##  $ lang             : chr(0) 
##  $ id               : chr "873115441303924736"
##  $ lastStatus       :Reference class 'status' [package "twitteR"] with 17 fields
##   ..$ text         : chr "<U+203B> <U+D065><U+D29C><U+BE0C><U+B294> <U+B9E4><U+C6D4> 17<U+C77C> <U+C624><U+D6C4> 7<U+C2DC> <U+C5C5><U+B85"| __truncated__
##   ..$ favorited    : logi FALSE
##   ..$ favoriteCount: num 36862
##   ..$ replyToSN    : chr "weareoneEXO"
##   ..$ created      : POSIXct[1:1], format: "2021-05-13 10:02:48"
##   ..$ truncated    : logi TRUE
##   ..$ replyToSID   : chr "1392782370793615364"
##   ..$ id           : chr "1392782372391718916"
##   ..$ replyToUID   : chr "873115441303924736"
##   ..$ statusSource : chr "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>"
##   ..$ screenName   : chr "Unknown"
##   ..$ retweetCount : num 14937
##   ..$ isRetweet    : logi FALSE
##   ..$ retweeted    : logi FALSE
##   ..$ longitude    : chr(0) 
##   ..$ latitude     : chr(0) 
##   ..$ urls         :'data.frame':    1 obs. of  5 variables:
##   .. ..$ url         : chr "https://t.co/6Jl1tra543"
##   .. ..$ expanded_url: chr "https://twitter.com/i/web/status/1392782372391718916"
##   .. ..$ display_url : chr "twitter.com/i/web/status/1…"
##   .. ..$ start_index : num 117
##   .. ..$ stop_index  : num 140
##   ..and 53 methods, of which 39 are  possibly relevant:
##   ..  getCreated, getFavoriteCount, getFavorited, getId, getIsRetweet,
##   ..  getLatitude, getLongitude, getReplyToSID, getReplyToSN, getReplyToUID,
##   ..  getRetweetCount, getRetweeted, getRetweeters, getRetweets, getScreenName,
##   ..  getStatusSource, getText, getTruncated, getUrls, initialize, setCreated,
##   ..  setFavoriteCount, setFavorited, setId, setIsRetweet, setLatitude,
##   ..  setLongitude, setReplyToSID, setReplyToSN, setReplyToUID,
##   ..  setRetweetCount, setRetweeted, setScreenName, setStatusSource, setText,
##   ..  setTruncated, setUrls, toDataFrame, toDataFrame#twitterObj
##  $ listedCount      : num 13730
##  $ followRequestSent: logi FALSE
##  $ profileImageUrl  : chr "http://pbs.twimg.com/profile_images/1391770003339300864/qJWPFDbj_normal.jpg"
##  and 59 methods, of which 45 are  possibly relevant:
##    getCreated, getDescription, getFavorites, getFavoritesCount,
##    getFavouritesCount, getFollowerIDs, getFollowers, getFollowersCount,
##    getFollowRequestSent, getFriendIDs, getFriends, getFriendsCount, getId,
##    getLang, getLastStatus, getListedCount, getLocation, getName,
##    getProfileImageUrl, getProtected, getScreenName, getStatusesCount, getUrl,
##    getVerified, initialize, setCreated, setDescription, setFavoritesCount,
##    setFollowersCount, setFollowRequestSent, setFriendsCount, setId, setLang,
##    setLastStatus, setListedCount, setLocation, setName, setProfileImageUrl,
##    setProtected, setScreenName, setStatusesCount, setUrl, setVerified,
##    toDataFrame, toDataFrame#twitterObj

Extract tweets from user’s timeline

tweets <- userTimeline(twitterUser, n = 10)

tweets <- userTimeline(twitterUser, n = 10, includeRts = T)

tweets <- userTimeline(twitterUser, n = 3200, includeRts = T, )

display attributes and function of tweet object

tweets[[1]]$getClass()
## Reference Class "status":
## 
## Class fields:
##                                                                             
## Name:           text     favorited favoriteCount     replyToSN       created
## Class:     character       logical       numeric     character       POSIXct
##                                                                             
## Name:      truncated    replyToSID            id    replyToUID  statusSource
## Class:       logical     character     character     character     character
##                                                                             
## Name:     screenName  retweetCount     isRetweet     retweeted     longitude
## Class:     character       numeric       logical       logical     character
##                                   
## Name:       latitude          urls
## Class:     character    data.frame
## 
## Class Methods: 
##      "setUrls", "getRetweets", "getRefClass", "getUrls", "setTruncated", 
##      "setText", "getReplyToSID", "getText", "export", "setCreated", 
##      "setFavoriteCount", "getCreated", "initialize", "callSuper", 
##      "getRetweeters", "initFields", "getClass", "setReplyToUID", "import", 
##      "setLatitude", "setIsRetweet", "getFavoriteCount", "getRetweetCount", 
##      "getIsRetweet", "setId", "setScreenName", "getLatitude", "getScreenName", 
##      "toDataFrame#twitterObj", "setRetweetCount", "setReplyToSID", "getId", 
##      "getReplyToUID", "setFavorited", "getRetweeted", "getFavorited", 
##      "toDataFrame", "setStatusSource", "setReplyToSN", "copy", "usingMethods", 
##      "setRetweeted", "field", ".objectParent", "getTruncated", "untrace", 
##      "trace", "setLongitude", "getLongitude", "getStatusSource", 
##      ".objectPackage", "getReplyToSN", "show"
## 
## Reference Superclasses: 
##      "twitterObj", "envRefClass"

class(tweets[[1]]

display main body(text) of tweet from row 1 to row 5

tweets$text[[1:5]]
## NULL

display main body text of tweet

tweets[[5]]$text
## [1] "<U+0001F4BF><U+0001F6D2> \n\n[Photo Book Ver.2]\n\nYES24<U+27AB> https://t.co/yrhKdDA5Yn \nAladin<U+27AB> https://t.co/h2hl0onhXY \nHottracks<U+27AB>… https://t.co/aYnDHXhZ4P"

display favorite count

tweets[[1]]$favoriteCount
## [1] 36862

display retweet count

tweets[[1]]$retweetCount
## [1] 14937

check current rate limit (Check /statuses/user_timeline)

getCurRateLimitInfo()

extract tweets based on a search term

searchTerm <- "#EXO"

trendingTweets = searchTwitter(searchTerm,n=5000,lang = "en") 

class(trendingTweets)
## [1] "list"
head(trendingTweets)
## [[1]]
## [1] "sundaemorningss: @PhixoBar @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #<U+C2DC><U+C6B0><U+BBFC>\n#EXO #<U+C5D1><U+C18C> \n@weareoneEXO"
## 
## [[2]]
## [1] "CalliopeCiello: RT @exoluniverseph: EXO-Ls, Please help us win some funds for @weareoneEXO's Comeback <U+0001F64F><U+0001F3FC>\n\nPlease rt and like our tweet and make sure to fol…"
## 
## [[3]]
## [1] "dviviofdsehun: RT @CITYLIGHTS_LIT: @dviviofdsehun @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #<U+C2DC><U+C6B0><U+BBFC>\n#EXO #<U+C5D1><U+C18C> \n@weareoneEXO"
## 
## [[4]]
## [1] "fffirelight: RT @PHIXOT9: Eris! We got this deal! Let us unite to win this another fight <U+2764><U+FE0F> Like and rt below!! Let us do this for our boys we can do it…"
## 
## [[5]]
## [1] "xmyeonmin: @aerieya @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #<U+C2DC><U+C6B0><U+BBFC>\n#EXO #<U+C5D1><U+C18C> \n@weareoneEXO"
## 
## [[6]]
## [1] "12IRBLDR13: RT @exoluniverseph: EXO-Ls, Please help us win some funds for @weareoneEXO's Comeback <U+0001F64F><U+0001F3FC>\n\nPlease rt and like our tweet and make sure to fol…"
str(trendingTweets[[1]])
## Reference class 'status' [package "twitteR"] with 17 fields
##  $ text         : chr "@PhixoBar @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #<U+C2DC><U+C6B0><U+BBFC>\n#EXO #<U+C5D1><U+C18C> \n@weareoneEXO"
##  $ favorited    : logi FALSE
##  $ favoriteCount: num 0
##  $ replyToSN    : chr "PhixoBar"
##  $ created      : POSIXct[1:1], format: "2021-05-14 02:06:32"
##  $ truncated    : logi FALSE
##  $ replyToSID   : chr "1393024821823344640"
##  $ id           : chr "1393024901301235714"
##  $ replyToUID   : chr "1369318888538771457"
##  $ statusSource : chr "<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>"
##  $ screenName   : chr "sundaemorningss"
##  $ retweetCount : num 0
##  $ isRetweet    : logi FALSE
##  $ retweeted    : logi FALSE
##  $ longitude    : chr(0) 
##  $ latitude     : chr(0) 
##  $ urls         :'data.frame':   0 obs. of  4 variables:
##   ..$ url         : chr(0) 
##   ..$ expanded_url: chr(0) 
##   ..$ dispaly_url : chr(0) 
##   ..$ indices     : num(0) 
##  and 53 methods, of which 39 are  possibly relevant:
##    getCreated, getFavoriteCount, getFavorited, getId, getIsRetweet,
##    getLatitude, getLongitude, getReplyToSID, getReplyToSN, getReplyToUID,
##    getRetweetCount, getRetweeted, getRetweeters, getRetweets, getScreenName,
##    getStatusSource, getText, getTruncated, getUrls, initialize, setCreated,
##    setFavoriteCount, setFavorited, setId, setIsRetweet, setLatitude,
##    setLongitude, setReplyToSID, setReplyToSN, setReplyToUID, setRetweetCount,
##    setRetweeted, setScreenName, setStatusSource, setText, setTruncated,
##    setUrls, toDataFrame, toDataFrame#twitterObj

covert trending tweets List into trendingtweetsdf

trendingTweets.df = twListToDF(trendingTweets)
trendingTweets.df$text <- sapply(trendingTweets.df$text,function(x) iconv(x,to='UTF-8'))
save(trendingTweets.df, file = "trendingTweets202105.Rda")
trendingTweets.df$text[[1]]
## [1] "@PhixoBar @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #시우민\n#EXO #엑소 \n@weareoneEXO"

1.library(twitteR)

2.library(tm)

  1. library(stringr)

  2. library(wordcloud)

  3. library(syuzhet)

  4. library(RColorBrewer)

  5. library(ggplot2)

library(twitteR)
library(wordcloud)
## Loading required package: RColorBrewer
library(RColorBrewer)
library(tm)
## Loading required package: NLP
library(stringr)
library(syuzhet)

#accounts which tweet about EXO

namesCorpus <- Corpus(VectorSource(trendingTweets.df$screenName))
class(trendingTweets.df$screenName)
## [1] "character"
class(VectorSource(trendingTweets.df$screenName))
## [1] "VectorSource" "SimpleSource" "Source"
class(namesCorpus)
## [1] "SimpleCorpus" "Corpus"
trendingTweets.df$text <- sapply(trendingTweets.df$text,function(x) iconv(x,to='UTF-8'))
head(trendingTweets.df$text)
## [1] "@PhixoBar @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #ìâ\200¹Å“우민\n#EXO #ìâ\200”â\200\230ìâ\200 Å’ \n@weareoneEXO"                                                            
## [2] "RT @exoluniverseph: EXO-Ls, Please help us win some funds for @weareoneEXO's Comeback ðŸ™Â\217ðŸÂ\217¼\n\nPlease rt and like our tweet and make sure to fol…"          
## [3] "RT @CITYLIGHTS_LIT: @dviviofdsehun @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #ìâ\200¹Å“우민\n#EXO #ìâ\200”â\200\230ìâ\200 Å’ \n@weareoneEXO"                                   
## [4] "RT @PHIXOT9: Eris! We got this deal! Let us unite to win this another fight âÂ\235¤ï¸Â\217 Like and rt below!! Let us do this for our boys we can do it…"                 
## [5] "@aerieya @weareoneEXO ACTOR XIUMIN IS BACK\n\n#XIUMIN #ìâ\200¹Å“우민\n#EXO #ìâ\200”â\200\230ìâ\200 Å’ \n@weareoneEXO"                                                             
## [6] "RT @exoluniverseph: EXO-Ls, Please help us win some funds for @weareoneEXO's Comeback ðŸ™Â\217ðŸÂ\217¼\n\nPlease rt and like our tweet and make sure to fol…"

#Cleaning the data

nohandles <- str_replace_all(trendingTweets.df$text, "@\\w+", " ")

nohandles$cleanedText <- gsub("http.*", " ", nohandles)
## Warning in nohandles$cleanedText <- gsub("http.*", " ", nohandles): Coercing LHS
## to a list
nohandles$cleanedText <- gsub("https.*", " ", nohandles$cleanedText )

head(nohandles$cleanedText)
## [1] "    ACTOR XIUMIN IS BACK\n\n#XIUMIN #ìâ\200¹Å“우민\n#EXO #ìâ\200”â\200\230ìâ\200 Å’ \n "                                                                            
## [2] "RT  : EXO-Ls, Please help us win some funds for  's Comeback ðŸ™Â\217ðŸÂ\217¼\n\nPlease rt and like our tweet and make sure to fol…"                     
## [3] "RT  :     ACTOR XIUMIN IS BACK\n\n#XIUMIN #ìâ\200¹Å“우민\n#EXO #ìâ\200”â\200\230ìâ\200 Å’ \n "                                                                      
## [4] "RT  : Eris! We got this deal! Let us unite to win this another fight âÂ\235¤ï¸Â\217 Like and rt below!! Let us do this for our boys we can do it…"          
## [5] "    ACTOR XIUMIN IS BACK\n\n#XIUMIN #ìâ\200¹Å“우민\n#EXO #ìâ\200”â\200\230ìâ\200 Å’ \n "                                                                            
## [6] "RT  : EXO-Ls, Please help us win some funds for  's Comeback ðŸ™Â\217ðŸÂ\217¼\n\nPlease rt and like our tweet and make sure to fol…"
nohandles$cleanedText <- str_replace_all(nohandles$cleanedText, "[^[:alnum:]]", " ")

nohandles$cleanedText <- str_replace_all(nohandles$cleanedText,"[[^a-zA-Z0-9]]", " ")

head(nohandles$cleanedText)
## [1] "    ACTOR XIUMIN IS BACK   XIUMIN                       EXO                    "                                                                      
## [2] "RT    EXO Ls  Please help us win some funds for   s Comeback                    Please rt and like our tweet and make sure to fol       "             
## [3] "RT        ACTOR XIUMIN IS BACK   XIUMIN                       EXO                    "                                                                
## [4] "RT    Eris  We got this deal  Let us unite to win this another fight              Like and rt below   Let us do this for our boys we can do it       "
## [5] "    ACTOR XIUMIN IS BACK   XIUMIN                       EXO                    "                                                                      
## [6] "RT    EXO Ls  Please help us win some funds for   s Comeback                    Please rt and like our tweet and make sure to fol       "
wordCorpus <- Corpus(VectorSource(nohandles$cleanedText))

wordCorpus[[1]]$content
## [1] "    ACTOR XIUMIN IS BACK   XIUMIN                       EXO                    "

#removing punctuations

wordCorpus <- tm_map(wordCorpus, removePunctuation)(wordCorpus[[1]]$content)

wordCorpus <- tm_map(wordCorpus, removeNumbers)

wordCorpus <- tm_map(wordCorpus, content_transformer(tolower))

wordCorpus[[1]]$content

#removing words such as “a”, “an”, “the”, “is”, etc

wordCorpus <- tm_map(wordCorpus, removeWords, stopwords(“english”))

wordCorpus <- tm_map(wordCorpus, removeWords, stopwords(“SMART”))

wordCorpus[[1]]$content

#manual removing of words that cannot be read

wordCorpus <- tm_map(wordCorpus, removeWords, c("amp"))
## Warning in tm_map.SimpleCorpus(wordCorpus, removeWords, c("amp")):
## transformation drops documents
wordCorpus <- tm_map(wordCorpus, removeWords, c("ste"))#manual assignment
## Warning in tm_map.SimpleCorpus(wordCorpus, removeWords, c("ste")):
## transformation drops documents
wordCorpus[[5]]$content
## [1] "    ACTOR XIUMIN IS BACK   XIUMIN                       EXO                    "
wordCorpus <- tm_map(wordCorpus, stripWhitespace)
## Warning in tm_map.SimpleCorpus(wordCorpus, stripWhitespace): transformation
## drops documents
wordCorpus[[1]]$content
## [1] " ACTOR XIUMIN IS BACK XIUMIN EXO "

str(wordCorpus)

library(wordcloud)
pal <- brewer.pal(9,"YlGnBu")
pal <- pal[-(1:4)]
set.seed(123)
par(mfrow = c(1,1))
wordcloud(words = wordCorpus, scale=c(5,0.3), max.words=500, random.order=FALSE, 
          rot.per=0.4, use.r.layout=FALSE, font.main=5,cex.main=1.5, colors=brewer.pal(8, "Dark2"))