Start collecting and analyzing some Twitter data
## search for 3000 tweets using the rstats hashtag
rt <- rtweet::search_tweets("#rstats", n = 3000, include_rts = FALSE)
## preview tweets data
rt %>% dplyr::glimpse(10)
## Observations: 2,865
## Variables: 88
## $ user_id <chr> ...
## $ status_id <chr> ...
## $ created_at <dttm> ...
## $ screen_name <chr> ...
## $ text <chr> ...
## $ source <chr> ...
## $ display_text_width <dbl> ...
## $ reply_to_status_id <chr> ...
## $ reply_to_user_id <chr> ...
## $ reply_to_screen_name <chr> ...
## $ is_quote <lgl> ...
## $ is_retweet <lgl> ...
## $ favorite_count <int> ...
## $ retweet_count <int> ...
## $ hashtags <list> ...
## $ symbols <list> ...
## $ urls_url <list> ...
## $ urls_t.co <list> ...
## $ urls_expanded_url <list> ...
## $ media_url <list> ...
## $ media_t.co <list> ...
## $ media_expanded_url <list> ...
## $ media_type <list> ...
## $ ext_media_url <list> ...
## $ ext_media_t.co <list> ...
## $ ext_media_expanded_url <list> ...
## $ ext_media_type <chr> ...
## $ mentions_user_id <list> ...
## $ mentions_screen_name <list> ...
## $ lang <chr> ...
## $ quoted_status_id <chr> ...
## $ quoted_text <chr> ...
## $ quoted_created_at <dttm> ...
## $ quoted_source <chr> ...
## $ quoted_favorite_count <int> ...
## $ quoted_retweet_count <int> ...
## $ quoted_user_id <chr> ...
## $ quoted_screen_name <chr> ...
## $ quoted_name <chr> ...
## $ quoted_followers_count <int> ...
## $ quoted_friends_count <int> ...
## $ quoted_statuses_count <int> ...
## $ quoted_location <chr> ...
## $ quoted_description <chr> ...
## $ quoted_verified <lgl> ...
## $ retweet_status_id <chr> ...
## $ retweet_text <chr> ...
## $ retweet_created_at <dttm> ...
## $ retweet_source <chr> ...
## $ retweet_favorite_count <int> ...
## $ retweet_retweet_count <int> ...
## $ retweet_user_id <chr> ...
## $ retweet_screen_name <chr> ...
## $ retweet_name <chr> ...
## $ retweet_followers_count <int> ...
## $ retweet_friends_count <int> ...
## $ retweet_statuses_count <int> ...
## $ retweet_location <chr> ...
## $ retweet_description <chr> ...
## $ retweet_verified <lgl> ...
## $ place_url <chr> ...
## $ place_name <chr> ...
## $ place_full_name <chr> ...
## $ place_type <chr> ...
## $ country <chr> ...
## $ country_code <chr> ...
## $ geo_coords <list> ...
## $ coords_coords <list> ...
## $ bbox_coords <list> ...
## $ status_url <chr> ...
## $ name <chr> ...
## $ location <chr> ...
## $ description <chr> ...
## $ url <chr> ...
## $ protected <lgl> ...
## $ followers_count <int> ...
## $ friends_count <int> ...
## $ listed_count <int> ...
## $ statuses_count <int> ...
## $ favourites_count <int> ...
## $ account_created_at <dttm> ...
## $ verified <lgl> ...
## $ profile_url <chr> ...
## $ profile_expanded_url <chr> ...
## $ account_lang <chr> ...
## $ profile_banner_url <chr> ...
## $ profile_background_url <chr> ...
## $ profile_image_url <chr> ...
## plot time series
ts_plot(rt) +
ggplot2::theme_minimal() +
ggplot2::theme(plot.title = ggplot2::element_text(face = "bold")) +
ggplot2::labs(
x = NULL, y = NULL,
title = "Frequency of #rstats Twitter statuses from past 9 days",
caption = "\nSource: Data collected from Twitter's REST API via rtweet"
)

Maps
## search for 1000 tweets sent from the US
rt <- search_tweets(
"lang:en", geocode = lookup_coords("usa"), n = 1000
)
## create lat/lng variables using all available tweet and profile geo-location data
rt <- lat_lng(rt)
## plot state boundaries
par(mar = c(0, 0, 0, 0))
maps::map("state", lwd = .25)
## plot lat and lng points onto state map
with(rt, points(lng, lat, pch = 20, cex = .75, col = rgb(0, .3, .7, .75)))

Timelines
## get the most recent 3000 tweets from cnn, BBCWorld, and foxnews
tmls <- get_timelines(c("cnn", "BBCWorld", "foxnews"), n = 3000)
## plot the frequency of tweets for each user over time
tmls %>%
dplyr::filter(created_at > "2018-10-01") %>%
dplyr::group_by(screen_name) %>%
ts_plot("days", trim = 1L) +
ggplot2::geom_point() +
ggplot2::theme_minimal() +
ggplot2::theme(
legend.title = ggplot2::element_blank(),
legend.position = "bottom",
plot.title = ggplot2::element_text(face = "bold")) +
ggplot2::labs(
x = NULL, y = NULL,
title = "Frequency of Tweets posted by news organization",
subtitle = "Tweet counts aggregated by day from October 2018",
caption = "\nSource: Data collected from Twitter's REST API via rtweet"
)

Trends and Favorite Topics
## Get the 30 most recently favorited tweets by CBS
cbs <- get_favorites("cbs", n = 30)
select(cbs, location, text) %>% head() %>% kable()
|
muppet arms Finally!!! Thank you thank you @CBS!!! Cant wait to see you grace my TV again @PhilKeoghan!!! https://t.co/RmxoSdNbLm |
United States |
This is my favorite part of #Rudolph does that make me weird? https://t.co/fXPiBccVol |
Los Angeles |
Clarice had maaaaaaad game. The way she fluttered those eyelashes and whispered in Rudolphs ear…. |
Those are the mo |
ves of a pro. |
Im over here ta |
king !! |
#Rudolph #shethi |
nksimcuuuute https://t.co/HQEGHsVVOS |
|
Can’t get enough of ’Rudolph the Red-nosed Reindeer…@cbs https://t.co/iZ6NGClUlc |
Portland, OR |
Time to start playing the ‘Rudolph’ music https://t.co/vASqKMHzil |
Oakland, CA |
Pound 4 Pound still THE best Xmas Special year after year. #Rudolph now 50+ yrs old! https://t.co/3HInygJbSb |
## Discover what's currently trending in NYC.
ny <- get_trends("New York")
select(ny, trend, url) %>% head() %>% kable()
References: