library(here)
library(tidyverse)
library(hrbrthemes) # pretty plots
library(extrafont)
the_tweets <- read_csv( file = here::here("data", "my_tweets2.csv"), 
                       col_types = cols(detail_expands = col_number(), 
                                        engagement_rate = col_double(),
                                        engagements = col_number(),
                                        follows = col_number(),
                                        hashtag_clicks = col_number(),
                                        impressions = col_number(),
                                        likes = col_number(),
                                        media_engagements = col_number(),
                                        media_views = col_number(), 
                                        permalink_clicks = col_number(),
                                        replies = col_number(),
                                        retweets = col_number(),
                                        tweet_id = col_character(),
                                        url_clicks = col_number(),
                                        user_profile_clicks = col_number()))

Let’s take a look. For kicks, I’ll arrange them in descending order by engagements, since (according to Twitter) those are the most thrilling.

the_tweets <- the_tweets %>%
  unique() %>%
  arrange(desc(engagements))
glimpse(the_tweets)
## Observations: 11,005
## Variables: 19
## $ tweet_id            <chr> "847994181800923100", "873650237994348500"...
## $ tweet_permalink     <chr> "https://twitter.com/dataandme/status/8479...
## $ tweet_text          <chr> "Great resource, examples w/ code: “A Comp...
## $ date                <date> 2017-04-01, 2017-06-10, 2017-08-12, 2017-...
## $ time                <time> 02:09:00, 21:17:00, 16:21:00, 17:02:00, 1...
## $ impressions         <dbl> 45583, 64870, 9864, 40636, 38173, 25683, 2...
## $ engagements         <dbl> 4089, 4050, 3750, 3412, 3307, 3235, 3013, ...
## $ engagement_rate     <dbl> 0.08970450, 0.06243256, 0.38017032, 0.0839...
## $ retweets            <dbl> 243, 279, 2, 191, 171, 76, 164, 174, 105, ...
## $ replies             <dbl> 0, 9, 3, 6, 4, 2, 3, 1, 6, 16, 1, 3, 5, 3,...
## $ likes               <dbl> 454, 622, 13, 505, 584, 217, 380, 363, 317...
## $ user_profile_clicks <dbl> 240, 248, 14, 132, 167, 149, 108, 178, 109...
## $ url_clicks          <dbl> 1065, 671, 40, 737, 675, 853, 939, 692, 56...
## $ hashtag_clicks      <dbl> 61, 13, 0, 11, 28, 25, 21, 6, 0, 30, 41, 4...
## $ detail_expands      <dbl> 458, 318, 47, 171, 150, 250, 161, 535, 138...
## $ permalink_clicks    <dbl> 0, 4, 0, 0, 0, 3, 3, 0, 0, 6, 0, 0, 0, 1, ...
## $ follows             <dbl> 6, 5, 0, 2, 0, 1, 1, 6, 1, 0, 2, 0, 1, 0, ...
## $ media_views         <dbl> 1558, 1881, 3631, 1656, 1528, 1659, 1233, ...
## $ media_engagements   <dbl> 1558, 1881, 3631, 1656, 1528, 1659, 1233, ...

How about a more temporal examination?

suppressPackageStartupMessages(library(lubridate))
the_tweets %>%
  count(week = floor_date(date, "week")) %>%
  ggplot(aes(week, n)) +
  geom_line() +
  labs(title = "@dataandme tweets per week, 2017",
       caption = "source: Twitter Analytics, retrieved 2017-12-31") +
  theme_ipsum_ps()

Inspired by Bob Rudis’ hrbrmstr’s Year In Review, let’s take a look at my tweet-length distribution.

# devtools::install_github("eclarke/ggbeeswarm")
library(ggbeeswarm)
the_tweets %>%
  mutate(`Tweet Length` = nchar(tweet_text)) %>%
  ggplot(aes(month(date), `Tweet Length`)) +
  geom_hline(yintercept = 140, linetype = "dotted", size = 0.25, color = "#2b2b2b") + # this far and no further
  geom_quasirandom(size = 1, shape = 21, color = "slategray", stroke = 0.1, groupOnX = TRUE) +
  labs(x = NULL, title = "Tweet Length Distribution") +
  theme_ipsum_rc(grid = "Y")

Let’s see what that would look like by date.

the_tweets %>%
  mutate(`Tweet Length` = nchar(tweet_text)) %>%
  ggplot(aes(date, `Tweet Length`)) +
  geom_hline(yintercept = 140, linetype = "dotted", size = 0.25, color = "#2b2b2b") + 
  geom_quasirandom(size = 1, shape = 21, color = "slategray", stroke = 0.1, groupOnX = TRUE) +
  labs(x = NULL, title = "Tweet Length Distribution") +
  theme_ipsum_rc(grid = "Y")

How about we take a look at emoji usage with the emo package?1

library(emo)

the_tweets %>%
  group_by(date) %>%
  mutate(emojis = ji_count(tweet_text)) %>%
  summarise(emojis = sum(emojis)) %>%
  ggplot(aes(date, emojis)) + 
  geom_smooth(method = "loess", formula = y ~ x) + 
  geom_point() +
  labs(title = "@dataandme daily emoji count 2017") +
  theme_ipsum_ps()

Emoji, as inspired by Romain François’ emojistory gist.

suppressPackageStartupMessages(library(magrittr))
the_tweets %>% 
  pull(tweet_text) %>% 
  str_extract_all(emo::ji_rx) %>% 
  flatten_chr() %>% 
  table() %>% 
  enframe(name = "emoji") %>% 
  left_join( emo::jis, by = "emoji" ) %>% 
  select( emoji, name, value) %>% 
  filter(value >= 50) %>%
  arrange( desc(value) ) %$%
  { writeLines(glue::glue("{emoji}  ({name}) - {value} times"))  }
## 😂  (face with tears of joy) - 413 times
## 📦  (package) - 299 times
## 👍  (thumbs up) - 298 times
## 😉  (winking face) - 209 times
## 😜  (winking face with tongue) - 145 times
## 😳  (flushed face) - 130 times
## 🔥  (fire) - 128 times
## 💻  (laptop computer) - 111 times
## 🙌  (raising hands) - 110 times
## 🤔  (thinking face) - 103 times
## 🤓  (nerd face) - 102 times
## 🏀  (basketball) - 91 times
## 🌟  (glowing star) - 88 times
## 😱  (face screaming in fear) - 84 times
## 👀  (eyes) - 81 times
## 😍  (smiling face with heart-eyes) - 80 times
## 📝  (memo) - 79 times
## 😵  (dizzy face) - 79 times
## ♀️  (female sign) - 74 times
## 🎉  (party popper) - 74 times
## 👩  (woman) - 66 times
## 🙄  (face with rolling eyes) - 63 times
## ✨  (sparkles) - 60 times
## 🤖  (robot face) - 57 times
## 🐦  (bird) - 56 times
## 💥  (collision) - 56 times
## 👨  (man) - 53 times
## 🖤  (black heart) - 53 times
## 📖  (open book) - 52 times
## 🏆  (trophy) - 50 times

In addition to pulling the emoji out of the tweets, I’ll join the emo::jis data frame to get emoji names, and transform a few variables to prepare to use custom images.

library(stringr)
tweet_emoji <- the_tweets %>% 
  pull(tweet_text) %>% 
  str_extract_all(emo::ji_rx) %>% 
  flatten_chr() %>% 
  table() %>% 
  enframe(name = "emoji") %>% 
  left_join( emo::jis, by = "emoji" ) %>%
  mutate(code = tolower(runes)) %>%
  mutate(emoji_name = gsub(" ", "-", name)) %>%
  arrange( desc(value) )
# emoji as factor
tweet_emoji$name <- factor(tweet_emoji$name, levels = tweet_emoji$name[order(-tweet_emoji$value)])
tweet_emoji <- tweet_emoji %>%
  mutate(emoji_image = paste(emoji_name, "_", code, ".png", sep = ""))
tweet_emoji <- tweet_emoji %>%
  mutate(emoji_url = paste('https://emojipedia-us.s3.amazonaws.com/thumbs/240/apple/118/', emoji_image, sep = ""))

Now for some fun taken from the kickin’ PRISMOJI “Emoji data science in R”.

df.plot <- top_n(tweet_emoji, 15, value)
g <- lapply(df.plot$emoji_url, grid::rasterGrob)

library(ggimage)
geom_twemoji <- function(mapping = NULL, data = NULL, inherit.aes = TRUE,
                         na.rm = FALSE, by = "width", ...) {
  geom_image(mapping, data, inherit.aes = inherit.aes, na.rm = na.rm, ..., geom = "twemoji")
}


df.plot %>%
  ggplot(aes(value, name)) +
  geom_twemoji(data = df.plot, aes(image = here::here("emoji", df.plot$emoji_image))) +
  coord_flip() +
  labs(title = "@dataandme 2017 emoji-use", y = "emoji", x = "times used") +
  theme_ipsum_ps() +
  theme(axis.text.x = element_blank())


  1. Inspired by Romain François’ recent analysis of my emoji usage.