We start installing three new packages: “rtweet,” “httpuv,” and “plotly.” and “require” them into the envirnoment.
if (!require("rtweet")) install.packages("rtweet", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("httpuv")) install.packages("httpuv", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
require("rtweet") # load the required library
require("httpuv")
require("plotly")
Then, please insert your appname, consumerKey, and consumerSecret into the following “character” variables.
appname <- "YOUR APP NAME"
consumerKey <- "YOUR CONSUMER KEY"
consumerSecret <- "YOUR CONSUMER SECRET"
access_token <- "YOUR ACCESS TOKEN"
access_secret <- "YOUR ACCESS SECRET"
Setup your Twitter token …..
twitter_token <- create_token(app = appname, consumer_key = consumerKey, consumer_secret = consumerSecret, access_token = access_token, access_secret = access_secret, set_renv =TRUE)
Once your Twitter API token is ready, we use the command “search_tweets” to search tweets containing the keyword “#WuhanCoronavirus.” The returned object (500 tweets, no retweets) is a data frame and [5,] and $COLUMN_NAME are used to display the 5th row and column of the data frame respectively.
wuhanc <- search_tweets("#WuhanCoronavirus", n = 500, include_rts = FALSE)
class(wuhanc) # Show its data class
## [1] "tbl_df" "tbl" "data.frame"
colnames(wuhanc) # Show all its columns
## [1] "user_id" "status_id"
## [3] "created_at" "screen_name"
## [5] "text" "source"
## [7] "display_text_width" "reply_to_status_id"
## [9] "reply_to_user_id" "reply_to_screen_name"
## [11] "is_quote" "is_retweet"
## [13] "favorite_count" "retweet_count"
## [15] "quote_count" "reply_count"
## [17] "hashtags" "symbols"
## [19] "urls_url" "urls_t.co"
## [21] "urls_expanded_url" "media_url"
## [23] "media_t.co" "media_expanded_url"
## [25] "media_type" "ext_media_url"
## [27] "ext_media_t.co" "ext_media_expanded_url"
## [29] "ext_media_type" "mentions_user_id"
## [31] "mentions_screen_name" "lang"
## [33] "quoted_status_id" "quoted_text"
## [35] "quoted_created_at" "quoted_source"
## [37] "quoted_favorite_count" "quoted_retweet_count"
## [39] "quoted_user_id" "quoted_screen_name"
## [41] "quoted_name" "quoted_followers_count"
## [43] "quoted_friends_count" "quoted_statuses_count"
## [45] "quoted_location" "quoted_description"
## [47] "quoted_verified" "retweet_status_id"
## [49] "retweet_text" "retweet_created_at"
## [51] "retweet_source" "retweet_favorite_count"
## [53] "retweet_retweet_count" "retweet_user_id"
## [55] "retweet_screen_name" "retweet_name"
## [57] "retweet_followers_count" "retweet_friends_count"
## [59] "retweet_statuses_count" "retweet_location"
## [61] "retweet_description" "retweet_verified"
## [63] "place_url" "place_name"
## [65] "place_full_name" "place_type"
## [67] "country" "country_code"
## [69] "geo_coords" "coords_coords"
## [71] "bbox_coords" "status_url"
## [73] "name" "location"
## [75] "description" "url"
## [77] "protected" "followers_count"
## [79] "friends_count" "listed_count"
## [81] "statuses_count" "favourites_count"
## [83] "account_created_at" "verified"
## [85] "profile_url" "profile_expanded_url"
## [87] "account_lang" "profile_banner_url"
## [89] "profile_background_url" "profile_image_url"
wuhanc[5,]$text ### The text of the status
## [1] "The Communist Party of China has been accused of underreporting coronavirus cases and deaths.\nTaiwan News channel confirms that the death are 25000 actually compared to the 560 declared\n#Hubei #WuhanCoronavirus\n#Corona https://t.co/sp1C8h4nRk"
wuhanc[5,]$screen_name ### Screen name of the user who posted this status
## [1] "VU3UJM"
wuhanc[5,"screen_name"]
## # A tibble: 1 x 1
## screen_name
## <chr>
## 1 VU3UJM
wuhanc[5:10,"screen_name"]
## # A tibble: 6 x 1
## screen_name
## <chr>
## 1 VU3UJM
## 2 awesomesundew5
## 3 RahulPariharapj
## 4 nosferatweets
## 5 AtrapadosEl
## 6 GraceCh15554845
wuhanc[5,]$created_at ### When this status was created
## [1] "2020-02-06 09:54:30 UTC"
wuhanc[5,]$retweet_count ### The number of times this status has been retweeted
## [1] 0
mean(wuhanc$followers_count) # mean followers count of the post's authors
## [1] 12454.85
median(wuhanc$followers_count) # median followers count of the post's authors
## [1] 344
plot_ly(x = ~wuhanc$followers_count, type = "histogram") # Show a histogram
## Exercise: Try a larger number by setting
## Lookup the rate limit table: https://developer.twitter.com/en/docs/basics/rate-limits.html
##rt <- search_tweets("#WuhanCoronavirus", n = 18000)
##rt <- search_tweets("#WuhanCoronavirus", n = 25000, retryonratelimit = TRUE) # beyond 15 mins rate limit
Next, we analyze the profile of Twitter handle “CGTNOfficial”,a “China Global Television Network, or CGTN, a multi-language, multi-platform media grouping.” Again, the returned values are in a data frame and we can use $ to show each column.
cgtn <- lookup_users("CGTNOfficial")
cgtn$name # Name of the user
## [1] "CGTN"
cgtn$followers_count # Followers count
## [1] 14068142
cgtn$description # User's description
## [1] "China Global Television Network, or CGTN, is a multi-language, multi-platform media grouping."
cgtn$location # Location
## [1] "Beijing, China"
It is interesting to check who are following this handle. We deploy the command “get_followers” to collect the list of its followers and use [,] and $ to display the contents of the data frame. Next, we use “lookup_users” to obtain the individual follower’s profile.
cgtn_folls <- get_followers("CGTNOfficial")
nrow(cgtn_folls) # Look at the size of the data
## [1] 5000
head(cgtn_folls) # First 6 followers
## # A tibble: 6 x 1
## user_id
## <chr>
## 1 861145634
## 2 1194478097631866880
## 3 923833314431119360
## 4 1098557865307107328
## 5 1081047652865847296
## 6 1100733802735960064
cgtn_folls_data <- lookup_users(cgtn_folls$user_id) # Obtain the followers' profile
cgtn_folls_data[10,]$screen_name # Show a follower's screen name
## [1] "Adeel5126"
cgtn_folls_data[10,]$location # location
## [1] "Azad Kashmir Province"
cgtn_folls_data[10,]$followers_count # follower's coiunt
## [1] 45
class(cgtn_folls_data$location) # Check the data class of location
## [1] "character"
sort(table(cgtn_folls_data$location),decreasing = TRUE)[1:10] # Check the top-10 distribution of "locations"
##
## People's Republic of China
## 3056 44
## United States India
## 21 15
## Beijing Brazil
## 14 11
## China Nairobi, Kenya
## 11 11
## Guangdong Hong Kong
## 10 10
class(cgtn_folls_data$followers_count) # Check the data class of followers_count
## [1] "integer"
summary(cgtn_folls_data$followers_count) # Show its summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 1.0 7.0 603.1 54.0 1130364.0
plot_ly(x = ~cgtn_folls_data$followers_count, type = "histogram") # Show a histogram
Finally, let’s have a look at the Twitter trend data and check the class of the returned data
loc <- trends_available()
sf <- get_trends("San Francisco") # trending topics in san francisco
ny <- get_trends("New York") # trending topics in new york
tk <- get_trends("Tokyo") # trending topics in tokyo
kr <- get_trends("Korea") # trending topics in Korea
us <- get_trends("United States") # trending topics in US
ww <- get_trends("Worldwide") # all around the world
class(ww) # Check data class - data.frame
## [1] "tbl_df" "tbl" "data.frame"
Last, check the extent to which the Twitter trend of each location is shared with the worldwide one.
sum(ww$trend %in% sf$trend)/length(ww$trend) # Check if trends@WW are trends$SF
## [1] 0.04
sum(ww$trend %in% ny$trend)/length(ww$trend) # Check if trends@WW are trends$NY
## [1] 0.06
sum(ww$trend %in% tk$trend)/length(ww$trend) # Check if trends@WW are trends$TK
## [1] 0.26
sum(ww$trend %in% kr$trend)/length(ww$trend) # Check if trends@WW are trends$KR
## [1] 0.06
sum(ww$trend %in% us$trend)/length(ww$trend) # Check if trends@WW are trends$US
## [1] 0.06
sum(ny$trend %in% sf$trend)/length(ww$trend) # Check if trends@NY are trends$SF
## [1] 0.86