Package loading:
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages -------------------------------------------------------- tidyverse 1.3.0 --
v ggplot2 3.3.3 v purrr 0.3.4
v tibble 3.0.4 v dplyr 1.0.2
v tidyr 1.1.2 v stringr 1.4.0
v readr 1.4.0 v forcats 0.5.0
-- Conflicts ----------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(DT)
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
library(plotly) # This package does interactive graphs
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Attaching package: 㤼㸱plotly㤼㸲
The following object is masked from 㤼㸱package:ggplot2㤼㸲:
last_plot
The following object is masked from 㤼㸱package:stats㤼㸲:
filter
The following object is masked from 㤼㸱package:graphics㤼㸲:
layout
library(rtweet) # This package accesses Twitter data
Attaching package: 㤼㸱rtweet㤼㸲
The following object is masked from 㤼㸱package:purrr㤼㸲:
flatten
library(lubridate)
Attaching package: 㤼㸱lubridate㤼㸲
The following objects are masked from 㤼㸱package:base㤼㸲:
date, intersect, setdiff, union
gaga_tweets <- search_tweets("ladygaga", n = 5000, include_rts = F)
Downloading [=>---------------------------------------] 4%
Downloading [=>---------------------------------------] 6%
Downloading [==>--------------------------------------] 8%
Downloading [===>-------------------------------------] 10%
Downloading [====>------------------------------------] 12%
Downloading [=====>-----------------------------------] 14%
Downloading [======>----------------------------------] 16%
Downloading [======>----------------------------------] 18%
Downloading [=======>---------------------------------] 20%
Downloading [========>--------------------------------] 22%
Downloading [=========>-------------------------------] 24%
Downloading [==========>------------------------------] 26%
Downloading [==========>------------------------------] 28%
Downloading [===========>-----------------------------] 30%
Downloading [============>----------------------------] 32%
Downloading [=============>---------------------------] 34%
Downloading [==============>--------------------------] 36%
Downloading [===============>-------------------------] 38%
Downloading [===============>-------------------------] 40%
Downloading [================>------------------------] 42%
Downloading [=================>-----------------------] 44%
Downloading [==================>----------------------] 46%
Downloading [===================>---------------------] 48%
Downloading [===================>---------------------] 50%
Downloading [====================>--------------------] 52%
Downloading [=====================>-------------------] 54%
Downloading [======================>------------------] 56%
Downloading [=======================>-----------------] 58%
Downloading [========================>----------------] 60%
Downloading [========================>----------------] 62%
Downloading [=========================>---------------] 64%
Downloading [==========================>--------------] 66%
Downloading [===========================>-------------] 68%
Downloading [============================>------------] 70%
Downloading [=============================>-----------] 72%
Downloading [=============================>-----------] 74%
Downloading [==============================>----------] 76%
Downloading [===============================>---------] 78%
Downloading [================================>--------] 80%
Downloading [=================================>-------] 82%
Downloading [=================================>-------] 84%
Downloading [==================================>------] 86%
Downloading [===================================>-----] 88%
Downloading [====================================>----] 90%
Downloading [=====================================>---] 92%
Downloading [======================================>--] 94%
Downloading [======================================>--] 96%
Downloading [=======================================>-] 98%
Downloading [=========================================] 100%
glimpse(gaga_tweets)
Rows: 4,924
Columns: 90
$ user_id <chr> "1303330718660128768", "1303330718660128768", "18580000...
$ status_id <chr> "1362800731846287363", "1362800161244794885", "13628007...
$ created_at <dttm> 2021-02-19 16:26:28, 2021-02-19 16:24:12, 2021-02-19 1...
$ screen_name <chr> "Amya45944248", "Amya45944248", "veronicawillyxo", "deb...
$ text <chr> "@Oreo @ladygaga bruh these oreos ain't it it taste lik...
$ source <chr> "Twitter Web App", "Twitter Web App", "Twitter for iPho...
$ display_text_width <dbl> 93, 18, 51, 18, 50, 12, 105, 26, 25, 35, 33, 80, 27, 34...
$ reply_to_status_id <chr> "1354896214144114700", "1354806780979605505", NA, "1362...
$ reply_to_user_id <chr> "126084292", "14230524", NA, "1042139340032356352", NA,...
$ reply_to_screen_name <chr> "Oreo", "ladygaga", NA, "KellyClarksonTV", NA, "gobshit...
$ is_quote <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, F...
$ is_retweet <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,...
$ favorite_count <int> 0, 0, 0, 0, 2, 0, 1, 1, 0, 1, 4, 6, 3, 25, 5, 4, 1, 2, ...
$ retweet_count <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0...
$ quote_count <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ reply_count <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ hashtags <list> [NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Chromati...
$ symbols <list> [NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ urls_url <list> [NA, NA, NA, NA, "twitter.com/kellyclarksont…", NA, "t...
$ urls_t.co <list> [NA, NA, NA, NA, "https://t.co/L9Cb8aShCk", NA, "https...
$ urls_expanded_url <list> [NA, NA, NA, NA, "https://twitter.com/kellyclarksontv/...
$ media_url <list> [NA, NA, NA, NA, NA, "http://pbs.twimg.com/tweet_video...
$ media_t.co <list> [NA, NA, NA, NA, NA, "https://t.co/9i20m3cwc0", NA, NA...
$ media_expanded_url <list> [NA, NA, NA, NA, NA, "https://twitter.com/projoepat/st...
$ media_type <list> [NA, NA, NA, NA, NA, "photo", NA, NA, NA, NA, NA, "pho...
$ ext_media_url <list> [NA, NA, NA, NA, NA, "http://pbs.twimg.com/tweet_video...
$ ext_media_t.co <list> [NA, NA, NA, NA, NA, "https://t.co/9i20m3cwc0", NA, NA...
$ ext_media_expanded_url <list> [NA, NA, NA, NA, NA, "https://twitter.com/projoepat/st...
$ ext_media_type <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ mentions_user_id <list> [<"126084292", "14230524">, <"14230524", "126084292">,...
$ mentions_screen_name <list> [<"Oreo", "ladygaga">, <"ladygaga", "Oreo">, "ladygaga...
$ lang <chr> "en", "fr", "en", "pt", "en", "en", "en", "en", "en", "...
$ quoted_status_id <chr> NA, NA, NA, NA, "1362763878422048769", NA, "13627903067...
$ quoted_text <chr> NA, NA, NA, NA, "Kicking the weekend off early with thi...
$ quoted_created_at <dttm> NA, NA, NA, NA, 2021-02-19 14:00:02, NA, 2021-02-19 15...
$ quoted_source <chr> NA, NA, NA, NA, "Twitter Media Studio", NA, "Twitter fo...
$ quoted_favorite_count <int> NA, NA, NA, NA, 524, NA, 1021, NA, NA, 675, 3466, NA, 3...
$ quoted_retweet_count <int> NA, NA, NA, NA, 121, NA, 138, NA, NA, 145, 213, NA, 213...
$ quoted_user_id <chr> NA, NA, NA, NA, "1042139340032356352", NA, "10289455646...
$ quoted_screen_name <chr> NA, NA, NA, NA, "KellyClarksonTV", NA, "spotify_data", ...
$ quoted_name <chr> NA, NA, NA, NA, "The Kelly Clarkson Show", NA, "Spotify...
$ quoted_followers_count <int> NA, NA, NA, NA, 79545, NA, 80889, NA, NA, 2566285, 1721...
$ quoted_friends_count <int> NA, NA, NA, NA, 622, NA, 1, NA, NA, 247880, 189, NA, 18...
$ quoted_statuses_count <int> NA, NA, NA, NA, 6001, NA, 4212, NA, NA, 320499, 27310, ...
$ quoted_location <chr> NA, NA, NA, NA, "", NA, "", NA, NA, "Los Angeles, CA", ...
$ quoted_description <chr> NA, NA, NA, NA, "Weekdays! \U0001f33b\u2b07️", NA, "You...
$ quoted_verified <lgl> NA, NA, NA, NA, TRUE, NA, FALSE, NA, NA, TRUE, TRUE, NA...
$ retweet_status_id <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ retweet_text <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ retweet_created_at <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ retweet_source <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ retweet_favorite_count <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ retweet_retweet_count <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ retweet_user_id <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ retweet_screen_name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ retweet_name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ retweet_followers_count <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ retweet_friends_count <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ retweet_statuses_count <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ retweet_location <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ retweet_description <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ retweet_verified <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ place_url <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ place_name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ place_full_name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ place_type <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ country <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ country_code <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ geo_coords <list> [<NA, NA>, <NA, NA>, <NA, NA>, <NA, NA>, <NA, NA>, <NA...
$ coords_coords <list> [<NA, NA>, <NA, NA>, <NA, NA>, <NA, NA>, <NA, NA>, <NA...
$ bbox_coords <list> [<NA, NA, NA, NA, NA, NA, NA, NA>, <NA, NA, NA, NA, NA...
$ status_url <chr> "https://twitter.com/Amya45944248/status/13628007318462...
$ name <chr> "itzme_amya", "itzme_amya", "veronica willy", "Débora B...
$ location <chr> "", "", "Miami Beach, FL", "Chromatica", "", "", "", ""...
$ description <chr> "follow me on insta bad_lul_love", "follow me on insta ...
$ url <chr> NA, NA, "https://t.co/TcU72Qumgh", "https://t.co/ffVfza...
$ protected <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,...
$ followers_count <int> 0, 0, 253, 535, 989, 989, 2506, 2506, 2506, 2506, 2506,...
$ friends_count <int> 1, 1, 92, 512, 1534, 1534, 4982, 4982, 4982, 4982, 4982...
$ listed_count <int> 0, 0, 2, 4, 16, 16, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,...
$ statuses_count <int> 7, 7, 3018, 9654, 16572, 16572, 58746, 58746, 58746, 58...
$ favourites_count <int> 10, 10, 4292, 69139, 71094, 71094, 86452, 86452, 86452,...
$ account_created_at <dttm> 2020-09-08 13:54:09, 2020-09-08 13:54:09, 2013-09-12 1...
$ verified <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,...
$ profile_url <chr> NA, NA, "https://t.co/TcU72Qumgh", "https://t.co/ffVfza...
$ profile_expanded_url <chr> NA, NA, "https://www.youtube.com/user/vwilkinsable", "h...
$ account_lang <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ profile_banner_url <chr> NA, NA, "https://pbs.twimg.com/profile_banners/18580000...
$ profile_background_url <chr> NA, NA, "http://abs.twimg.com/images/themes/theme1/bg.p...
$ profile_image_url <chr> "http://pbs.twimg.com/profile_images/130502254170302054...
Lady Gaga is considered one of the top female artists in the music industry. Not only is she musically versatile, she is well known for her eccentric wardrobe and dramatic makeup. Lady Gaga is one of the top 10 twitter users and is followed by over 80 million people. In this analysis, we will take a look at her tweeting habits. The following table lists the 25 most popular tweets about Lady Gaga. The top tweet indicates she is missed.
gaga_tweets %>%
select(text, retweet_count) %>%
top_n(25) %>% # get the top 25 most popular tweets
arrange(-retweet_count) %>% # sort in descending order of popularity
datatable()
Selecting by retweet_count
Next we will look at what hashtags people are using while tweeting about Lady Gaga. The hashtag, “joanne” appears to be the second most tweeted item. Joann is Lady Gaga’s middle name, as Lady Gaga is just a stage name.
gaga_tweets <- get_timeline("LadyGaga", n = 5000)
gaga_tweets %>%
select(hashtags) %>% # Focus on the hashtags
unnest() %>% # Separate multiple hashtags
mutate(hashtags = tolower(hashtags)) %>% # make all hashtags lowercase
count(hashtags, sort=TRUE) %>% # count how often they appear
datatable() # create an interactive table
`cols` is now required when using unnest().
Please use `cols = c(hashtags)`
Now we will look at how often and when Lady Gaga tweets. Lady Gaga tweeted the most on October 19, 2016. On that day, Lady Gaga got in a Twitter disagreement with the Chainsmokers about the release of her new album, Joanne. Although that was a busy day on Twitter for Lady Gaga, she is not a prolific tweeter. Lady Gaga averages about 3 tweets per day.
gaga_tweets %>%
group_by(day = date(created_at)) %>% # extract the date, group by it
summarize(tweets_per_day = n()) # count the number of tweets each day
`summarise()` ungrouping output (override with `.groups` argument)
gaga_tweets %>%
group_by(day = date(created_at)) %>% # extract the date, group by it
summarize(tweets_per_day = n()) %>% # count the number of tweets each day
summarize(mean(tweets_per_day))
`summarise()` ungrouping output (override with `.groups` argument)
Next we will determine when and how often Lady Gaga tweets. The histogram below shows how often Lady Gaga tweeted on a daily basis over the course of time.
gaga_tweets %>%
mutate(day = date(created_at)) %>%
plot_ly(x = ~day) %>%
add_histogram() %>%
layout(title = "Number of Tweets from @LadyGaga")
Then we determine in the table and graph below that Lady Gaga prefers tweeting around the noon hour
tz(gaga_tweets$created_at)
[1] "UTC"
gaga_tweets %>%
mutate(time = with_tz(created_at, "America/Los_Angeles")) %>%
mutate(time = hour(time)) %>%
count(time) %>%
datatable(options = (list(pageLength = 24)), rownames = F)
NA
gaga_tweets %>%
mutate(time = with_tz(created_at, "America/Los_Angeles")) %>% # convert to Los Angeles time zone
mutate(time = hour(time)) %>% # extract the hour
plot_ly(x = ~time) %>% # create plotly graph
add_histogram() %>% # make histogram
layout(title = "What Time of Day Does @LadyGaga Tweet?",
xaxis = list(title = "Time of Day (0 = midnight)"),
yaxis = list(title = "Number of Tweets"))
Finally, in the table and graphs below, we can see that Lady Gaga tweets the most on Thursdays. However, in determining what day and what hour Lady Gaga tweeted the most, we find that Lady Gaga tweeted the most on a Wednesday at noon.
gaga_tweets %>%
mutate(Day = wday(created_at, # find the weekday that the tweet was created
label = T)) %>% # use labels (Sun, Mon, etc) rather than numbers
count(Day) %>% # count the number of tweets each day
datatable(rownames = F)
NA
gaga_tweets %>%
mutate(Day = wday(created_at, # find the weekday that the tweet was created
label = T)) %>% # use labels (Sun, Mon, etc) rather than numbers
plot_ly(x = ~Day) %>% # create plotly graph
add_histogram() %>% # make histogram
layout(title = "What Day Does @LadyGaga Tweet the Most?",
xaxis = list(title = "Days of the Week"),
yaxis = list(title = "Number of Tweets"))
NA
gaga_tweets %>%
mutate(day = wday(created_at, label = T)) %>%
mutate(hour = hour(with_tz(created_at, "America/Los_Angeles"))) %>%
plot_ly(x = ~day, y = ~hour) %>%
add_histogram2d(nbinsx = 7, nbinsy = 24) %>%
layout(title = "What Day and Hour Did @LadyGaga Tweet the Most?",
xaxis = list(title = "Days of the Week"),
yaxis = list(title = "Hour of the Day (0 = midnight"))
NA
NA