library(here)
library(tidyverse)
library(hrbrthemes) # pretty plots
library(extrafont)
the_tweets <- read_csv( file = here::here("data", "my_tweets2.csv"),
col_types = cols(detail_expands = col_number(),
engagement_rate = col_double(),
engagements = col_number(),
follows = col_number(),
hashtag_clicks = col_number(),
impressions = col_number(),
likes = col_number(),
media_engagements = col_number(),
media_views = col_number(),
permalink_clicks = col_number(),
replies = col_number(),
retweets = col_number(),
tweet_id = col_character(),
url_clicks = col_number(),
user_profile_clicks = col_number()))
Let’s take a look. For kicks, I’ll arrange them in descending order by engagements, since (according to Twitter) those are the most thrilling.
the_tweets <- the_tweets %>%
unique() %>%
arrange(desc(engagements))
glimpse(the_tweets)
## Observations: 11,005
## Variables: 19
## $ tweet_id <chr> "847994181800923100", "873650237994348500"...
## $ tweet_permalink <chr> "https://twitter.com/dataandme/status/8479...
## $ tweet_text <chr> "Great resource, examples w/ code: “A Comp...
## $ date <date> 2017-04-01, 2017-06-10, 2017-08-12, 2017-...
## $ time <time> 02:09:00, 21:17:00, 16:21:00, 17:02:00, 1...
## $ impressions <dbl> 45583, 64870, 9864, 40636, 38173, 25683, 2...
## $ engagements <dbl> 4089, 4050, 3750, 3412, 3307, 3235, 3013, ...
## $ engagement_rate <dbl> 0.08970450, 0.06243256, 0.38017032, 0.0839...
## $ retweets <dbl> 243, 279, 2, 191, 171, 76, 164, 174, 105, ...
## $ replies <dbl> 0, 9, 3, 6, 4, 2, 3, 1, 6, 16, 1, 3, 5, 3,...
## $ likes <dbl> 454, 622, 13, 505, 584, 217, 380, 363, 317...
## $ user_profile_clicks <dbl> 240, 248, 14, 132, 167, 149, 108, 178, 109...
## $ url_clicks <dbl> 1065, 671, 40, 737, 675, 853, 939, 692, 56...
## $ hashtag_clicks <dbl> 61, 13, 0, 11, 28, 25, 21, 6, 0, 30, 41, 4...
## $ detail_expands <dbl> 458, 318, 47, 171, 150, 250, 161, 535, 138...
## $ permalink_clicks <dbl> 0, 4, 0, 0, 0, 3, 3, 0, 0, 6, 0, 0, 0, 1, ...
## $ follows <dbl> 6, 5, 0, 2, 0, 1, 1, 6, 1, 0, 2, 0, 1, 0, ...
## $ media_views <dbl> 1558, 1881, 3631, 1656, 1528, 1659, 1233, ...
## $ media_engagements <dbl> 1558, 1881, 3631, 1656, 1528, 1659, 1233, ...
How about a more temporal examination?
suppressPackageStartupMessages(library(lubridate))
the_tweets %>%
count(week = floor_date(date, "week")) %>%
ggplot(aes(week, n)) +
geom_line() +
labs(title = "@dataandme tweets per week, 2017",
caption = "source: Twitter Analytics, retrieved 2017-12-31") +
theme_ipsum_ps()
Inspired by Bob Rudis’ hrbrmstr’s Year In Review, let’s take a look at my tweet-length distribution.
# devtools::install_github("eclarke/ggbeeswarm")
library(ggbeeswarm)
the_tweets %>%
mutate(`Tweet Length` = nchar(tweet_text)) %>%
ggplot(aes(month(date), `Tweet Length`)) +
geom_hline(yintercept = 140, linetype = "dotted", size = 0.25, color = "#2b2b2b") + # this far and no further
geom_quasirandom(size = 1, shape = 21, color = "slategray", stroke = 0.1, groupOnX = TRUE) +
labs(x = NULL, title = "Tweet Length Distribution") +
theme_ipsum_rc(grid = "Y")
Let’s see what that would look like by date.
the_tweets %>%
mutate(`Tweet Length` = nchar(tweet_text)) %>%
ggplot(aes(date, `Tweet Length`)) +
geom_hline(yintercept = 140, linetype = "dotted", size = 0.25, color = "#2b2b2b") +
geom_quasirandom(size = 1, shape = 21, color = "slategray", stroke = 0.1, groupOnX = TRUE) +
labs(x = NULL, title = "Tweet Length Distribution") +
theme_ipsum_rc(grid = "Y")
How about we take a look at emoji usage with the emo package?1
library(emo)
the_tweets %>%
group_by(date) %>%
mutate(emojis = ji_count(tweet_text)) %>%
summarise(emojis = sum(emojis)) %>%
ggplot(aes(date, emojis)) +
geom_smooth(method = "loess", formula = y ~ x) +
geom_point() +
labs(title = "@dataandme daily emoji count 2017") +
theme_ipsum_ps()
Emoji, as inspired by Romain François’ emojistory gist.
suppressPackageStartupMessages(library(magrittr))
the_tweets %>%
pull(tweet_text) %>%
str_extract_all(emo::ji_rx) %>%
flatten_chr() %>%
table() %>%
enframe(name = "emoji") %>%
left_join( emo::jis, by = "emoji" ) %>%
select( emoji, name, value) %>%
filter(value >= 50) %>%
arrange( desc(value) ) %$%
{ writeLines(glue::glue("{emoji} ({name}) - {value} times")) }
## 😂 (face with tears of joy) - 413 times
## 📦 (package) - 299 times
## 👍 (thumbs up) - 298 times
## 😉 (winking face) - 209 times
## 😜 (winking face with tongue) - 145 times
## 😳 (flushed face) - 130 times
## 🔥 (fire) - 128 times
## 💻 (laptop computer) - 111 times
## 🙌 (raising hands) - 110 times
## 🤔 (thinking face) - 103 times
## 🤓 (nerd face) - 102 times
## 🏀 (basketball) - 91 times
## 🌟 (glowing star) - 88 times
## 😱 (face screaming in fear) - 84 times
## 👀 (eyes) - 81 times
## 😍 (smiling face with heart-eyes) - 80 times
## 📝 (memo) - 79 times
## 😵 (dizzy face) - 79 times
## ♀️ (female sign) - 74 times
## 🎉 (party popper) - 74 times
## 👩 (woman) - 66 times
## 🙄 (face with rolling eyes) - 63 times
## ✨ (sparkles) - 60 times
## 🤖 (robot face) - 57 times
## 🐦 (bird) - 56 times
## 💥 (collision) - 56 times
## 👨 (man) - 53 times
## 🖤 (black heart) - 53 times
## 📖 (open book) - 52 times
## 🏆 (trophy) - 50 times
In addition to pulling the emoji out of the tweets, I’ll join the emo::jis data frame to get emoji names, and transform a few variables to prepare to use custom images.
library(stringr)
tweet_emoji <- the_tweets %>%
pull(tweet_text) %>%
str_extract_all(emo::ji_rx) %>%
flatten_chr() %>%
table() %>%
enframe(name = "emoji") %>%
left_join( emo::jis, by = "emoji" ) %>%
mutate(code = tolower(runes)) %>%
mutate(emoji_name = gsub(" ", "-", name)) %>%
arrange( desc(value) )
# emoji as factor
tweet_emoji$name <- factor(tweet_emoji$name, levels = tweet_emoji$name[order(-tweet_emoji$value)])
tweet_emoji <- tweet_emoji %>%
mutate(emoji_image = paste(emoji_name, "_", code, ".png", sep = ""))
tweet_emoji <- tweet_emoji %>%
mutate(emoji_url = paste('https://emojipedia-us.s3.amazonaws.com/thumbs/240/apple/118/', emoji_image, sep = ""))
Now for some fun taken from the kickin’ PRISMOJI “Emoji data science in R”.
df.plot <- top_n(tweet_emoji, 15, value)
g <- lapply(df.plot$emoji_url, grid::rasterGrob)
library(ggimage)
geom_twemoji <- function(mapping = NULL, data = NULL, inherit.aes = TRUE,
na.rm = FALSE, by = "width", ...) {
geom_image(mapping, data, inherit.aes = inherit.aes, na.rm = na.rm, ..., geom = "twemoji")
}
df.plot %>%
ggplot(aes(value, name)) +
geom_twemoji(data = df.plot, aes(image = here::here("emoji", df.plot$emoji_image))) +
coord_flip() +
labs(title = "@dataandme 2017 emoji-use", y = "emoji", x = "times used") +
theme_ipsum_ps() +
theme(axis.text.x = element_blank())
Inspired by Romain François’ recent analysis of my emoji usage.↩