Loading, setting up

library(tidyverse)
library(ggraph)
library(tidygraph)
library(tidytags)
library(leaflet)

Merging and then viewing data

Tweets from Google Sheets

d1 <- read_csv("googlesheets-tweets.csv")

Tweets from TAGS

d2 <- read_csv("rtweet-data.csv")
d <- d1 %>% 
  bind_rows(d2)

d %>% 
  glimpse()
## Rows: 320
## Columns: 90
## $ user_id                 <dbl> 1093954938, 1093954938, 1093954938, 109395493…
## $ status_id               <dbl> 1.252666e+18, 1.252671e+18, 1.250458e+18, 1.2…
## $ created_at              <dttm> 2020-04-21 18:30:12, 2020-04-21 18:52:03, 20…
## $ screen_name             <chr> "MLCmath", "MLCmath", "MLCmath", "MLCmath", "…
## $ text                    <chr> "#FirstGrade Activity of the Day: Which One D…
## $ source                  <chr> "Sprout Social", "Sprout Social", "Hootsuite …
## $ display_text_width      <dbl> 260, 261, 273, 272, 273, 253, 246, 270, 251, …
## $ reply_to_status_id      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ reply_to_user_id        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ reply_to_screen_name    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ is_quote                <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ is_retweet              <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ favorite_count          <dbl> 8, 2, 4, 8, 7, 4, 6, 5, 12, 4, 5, 7, 6, 2, 12…
## $ retweet_count           <dbl> 8, 5, 4, 9, 7, 4, 6, 6, 6, 3, 8, 8, 8, 2, 14,…
## $ quote_count             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ reply_count             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ hashtags                <chr> "FirstGrade MathAtHome DistanceLearning", "Se…
## $ symbols                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ urls_url                <chr> "mathathome.mathlearningcenter.org/grade-1", …
## $ urls_t.co               <chr> "https://t.co/RUywpyfOhL", "https://t.co/1u1p…
## $ urls_expanded_url       <chr> "https://mathathome.mathlearningcenter.org/gr…
## $ media_url               <chr> "http://pbs.twimg.com/media/EWJdDHIWoAQNf1k.p…
## $ media_t.co              <chr> "https://t.co/7Akukkm9a5", "https://t.co/w4ha…
## $ media_expanded_url      <chr> "https://twitter.com/MLCmath/status/125266595…
## $ media_type              <chr> "photo", "photo", "photo", "photo", "photo", …
## $ ext_media_url           <chr> "http://pbs.twimg.com/media/EWJdDHIWoAQNf1k.p…
## $ ext_media_t.co          <chr> "https://t.co/7Akukkm9a5", "https://t.co/w4ha…
## $ ext_media_expanded_url  <chr> "https://twitter.com/MLCmath/status/125266595…
## $ ext_media_type          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ mentions_user_id        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ mentions_screen_name    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lang                    <chr> "en", "en", "en", "en", "en", "en", "en", "en…
## $ quoted_status_id        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_text             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_created_at       <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ quoted_source           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_favorite_count   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_retweet_count    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_user_id          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_screen_name      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_name             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_followers_count  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_friends_count    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_statuses_count   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_location         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_description      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_verified         <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_status_id       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_text            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_created_at      <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ retweet_source          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_favorite_count  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_retweet_count   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_user_id         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_screen_name     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_name            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_followers_count <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_friends_count   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_statuses_count  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_location        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_description     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_verified        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ place_url               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ place_name              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ place_full_name         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ place_type              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ country                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ country_code            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ geo_coords              <chr> "NA NA", "NA NA", "NA NA", "NA NA", "NA NA", …
## $ coords_coords           <chr> "NA NA", "NA NA", "NA NA", "NA NA", "NA NA", …
## $ bbox_coords             <chr> "NA NA NA NA NA NA NA NA", "NA NA NA NA NA NA…
## $ status_url              <chr> "https://twitter.com/MLCmath/status/125266595…
## $ name                    <chr> "Math Learning Center", "Math Learning Center…
## $ location                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ description             <chr> "The Math Learning Center is a nonprofit orga…
## $ url                     <chr> "http://t.co/ecNvJLHk", "http://t.co/ecNvJLHk…
## $ protected               <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ followers_count         <dbl> 4305, 4305, 4305, 4305, 4305, 4305, 4305, 430…
## $ friends_count           <dbl> 978, 978, 978, 978, 978, 978, 978, 978, 978, …
## $ listed_count            <dbl> 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5…
## $ statuses_count          <dbl> 1575, 1575, 1575, 1575, 1575, 1575, 1575, 157…
## $ favourites_count        <dbl> 2335, 2335, 2335, 2335, 2335, 2335, 2335, 233…
## $ account_created_at      <dttm> 2013-01-16 03:11:46, 2013-01-16 03:11:46, 20…
## $ verified                <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ profile_url             <chr> "http://t.co/ecNvJLHk", "http://t.co/ecNvJLHk…
## $ profile_expanded_url    <chr> "http://www.mathlearningcenter.org", "http://…
## $ account_lang            <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ profile_banner_url      <chr> "https://pbs.twimg.com/profile_banners/109395…
## $ profile_background_url  <chr> "http://abs.twimg.com/images/themes/theme2/bg…
## $ profile_image_url       <chr> "http://pbs.twimg.com/profile_images/88697277…

Number of individuals

d %>% 
  count(screen_name) %>% 
  arrange(desc(n))
## # A tibble: 187 x 2
##    screen_name         n
##    <chr>           <int>
##  1 MLCmath            43
##  2 JUMP_Math          13
##  3 DrMNikfar           9
##  4 staaSTALLIONS       9
##  5 ForestGlenGator     6
##  6 abmath7             5
##  7 Mathcutups          5
##  8 caob2018            4
##  9 Inv3_Math           4
## 10 iteachmathAll       4
## # … with 177 more rows

Distribution of number of tweets per individual

d %>% 
  count(screen_name) %>% 
  arrange(desc(n)) %>% 
  ggplot(aes(x = n)) +
  geom_histogram()

Number of indivdiuals with two or more original tweets

n_tweets <- d %>% 
  count(screen_name)

n_tweets %>% 
  filter(n >= 2)
## # A tibble: 40 x 2
##    screen_name       n
##    <chr>         <int>
##  1 abmath7           5
##  2 anujgsj           2
##  3 bmeyring          2
##  4 caob2018          4
##  5 ChiefBearspaw     2
##  6 DeniseGB55        2
##  7 DrMNikfar         9
##  8 EarlyMathEDC      3
##  9 eriksonmath       2
## 10 eschoolnews       2
## # … with 30 more rows

Coding for professional role

codes <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vQPer3xRgn56MOIznyAOPTFvFEci47EVkIOoSYN5GuGcYiDw5t24jExKHBdoGJlRryAGxi2yFlShd42/pub?output=csv")

codes %>% 
  count(code) %>% 
  arrange(desc(n))
## # A tibble: 8 x 2
##   code             n
##   <chr>        <int>
## 1 teacher         10
## 2 school           9
## 3 organization     7
## 4 unclear          5
## 5 coach            4
## 6 professor        3
## 7 hashtag          1
## 8 media            1
codes %>% 
  count(code, `also-parent`) %>% 
  filter(!is.na(`also-parent`))
## # A tibble: 3 x 3
##   code      `also-parent`     n
##   <chr>             <dbl> <int>
## 1 professor             1     1
## 2 teacher               1     2
## 3 unclear               1     1

Prepare edgelist

edgelist <- d %>% 
  tidytags::create_edgelist()

edgelist %>% 
  count(edge_type) %>% 
  arrange(desc(n))
## # A tibble: 4 x 2
##   edge_type       n
##   <chr>       <int>
## 1 mention       229
## 2 retweet       105
## 3 quote-tweet    15
## 4 reply           6
edgelist %>% 
  count(sender, receiver) %>% 
  arrange(desc(n))
## # A tibble: 210 x 3
##    sender          receiver            n
##    <chr>           <chr>           <int>
##  1 ForestGlenGator JUMP_Math          12
##  2 caob2018        JUMP_Math           8
##  3 DrMNikfar       HobbsTimes4         8
##  4 TaothaSchool    JUMP_Math           8
##  5 TTPowerUp1      JUMP_Math           8
##  6 anujgsj         teacheranand        4
##  7 bmeyring        teachersam_dmti     4
##  8 ChiefBearspaw   JUMP_Math           4
##  9 DrMNikfar       Krit_Popke          4
## 10 indranil2kin    teacheranand        4
## # … with 200 more rows
codes_ss <- select(codes, screen_name, code, ntweets, parent = `also-parent`)

# edgelist <- tidytags::add_users_data(edgelist, codes_ss)

users <- n_tweets %>% 
  left_join(codes_ss) %>% 
  mutate(code = if_else(is.na(code), "uncoded", code))

users
## # A tibble: 187 x 5
##    screen_name         n code      ntweets parent
##    <chr>           <int> <chr>       <dbl>  <dbl>
##  1 _LarissaChan        1 uncoded        NA     NA
##  2 070180              1 uncoded        NA     NA
##  3 0rtizclassroom      1 uncoded        NA     NA
##  4 24game              1 uncoded        NA     NA
##  5 4ahealthyhabit      1 uncoded        NA     NA
##  6 AbecedaireApps      1 uncoded        NA     NA
##  7 abmath7             5 professor       5     NA
##  8 adrianmendozaed     1 uncoded        NA     NA
##  9 akhlaquequmar35     1 uncoded        NA     NA
## 10 anshtripathi029     1 uncoded        NA     NA
## # … with 177 more rows
# edgelist %>% 
#   left_join(rename(n_tweets, sender = screen_name, ntweets_sender_1 = n)) %>% 
#   left_join(select(n_tweets, receiver = screen_name, ntweets_receiver_1 = n)) %>% 
#   mutate(ntweets_sender = if_else(is.na(ntweets_sender), ntweets_sender_1, as.integer(ntweets_sender))) %>% 
#   mutate(ntweets_receiver = if_else(is.na(ntweets_receiver), ntweets_sender_1, as.integer(ntweets_receiver)))

edgelist
## # A tibble: 355 x 3
##    sender          receiver        edge_type
##    <chr>           <chr>           <chr>    
##  1 JJDLagrange     YehCathery      reply    
##  2 MTL_CS_BozziK   MTL_CS_BozziK   reply    
##  3 FirstInMath     KAndrew000      reply    
##  4 pelletier_t3    pelletier_t3    reply    
##  5 aprilschoenberg missmillerin5th reply    
##  6 UTEddieBrown    UTEddieBrown    reply    
##  7 iteachmathAll   Mathcutups      retweet  
##  8 iteachmathAll   HobbsTimes4     retweet  
##  9 iteachmathAll   Krit_Popke      retweet  
## 10 iteachmathAll   Mathcutups      retweet  
## # … with 345 more rows

Create graph using tidygraph

edgelist_ss <- edgelist %>% 
  filter(sender %in% users$screen_name & receiver %in% users$screen_name)

users <- mutate(users, parent = as.factor(ifelse(is.na(parent), "No", "Yes")))

users$screen_name_addorned <- ifelse(users$parent == 1, str_c(users$screen_name, "*"), users$screen_name)

graph <- igraph::graph_from_data_frame(edgelist_ss, vertices = users) %>% 
  as_tbl_graph() %>% 
  mutate(Popularity = centrality_degree(mode = 'in'))

Plot using ggraph

ggraph(graph, layout = 'kk') + 
  geom_edge_fan(aes(alpha = stat(index)), show.legend = FALSE) + 
  geom_node_point(aes(size = Popularity, color = code, shape = parent)) +
  geom_node_text(aes(label = ifelse(Popularity > 3.5, name, NA))) + 
  theme_graph() +
  scale_color_brewer("Role", type = "qual", palette = 3) +
  labs(caption = "Only those who posted one or more original tweets are included") +
  guides(size = FALSE)

Geocoded locations

Using the Google Maps API based on the self-reported Twitter location

# d <- d %>% 
#   mutate(location = ifelse(str_detect(location, "#"), NA, location))
# 
# geocoded_locs <- d %>% 
#   tidytags::geocode_tags()
# 
# write_rds(geocoded_locs, "geocoded-locs.rds")

geocoded_locs <- read_rds("geocoded-locs.rds")

users_all <- rtweet::users_data(d) %>% 
  left_join(users)

# geocoded_locs$code <- users_all$code
# geocoded_locs$parent <- users_all$parent

# example_unique_places <- dplyr::distinct(geocoded_locs, location, .keep_all = TRUE)
# example_geo_coords <- geocode_tags(example_unique_places)
pal <- colorFactor(RColorBrewer::brewer.pal(9, "Set1"), unique(users_all$code))

leaflet() %>% 
  addProviderTiles("OpenStreetMap") %>% 
  addCircleMarkers(data = geocoded_locs,
                   radius = users_all$n,
                   label = str_c(d$screen_name, "; Parent: ", users_all$parent, "; N tweets: ", users_all$n),
                   color = ~pal(users_all$code)) %>% 
  addLegend("bottomright", pal = pal, values = users_all$code, 
            title = "Role")

Ideas for next steps

Appendix

Code for accessing Google Sheets data

library(tidyverse)

older_data <- read_lines("older-mathathome-tweets.txt")
older_data_words <- older_data %>% 
  str_split(" ") %>% 
  unlist()

is_a_link <- older_data_words %>% 
  str_detect("t.co") 

links <- older_data_words[is_a_link]

expanded_links <- longurl::expand_urls(links)

expanded_links %>% count(orig_url) %>% 
  arrange(desc(n))

expanded_links <- expanded_links %>% 
  filter(str_detect(expanded_url, "twitter.com"))

statuses <- str_split(expanded_links$expanded_url, "/")
statuses <- statuses %>% map_chr(~.[6])

gss <- pull_tweet_data(statuses)

write_csv(flatten(gss), "googlesheets-tweets.csv")

Code for accessing TAGS data

library(tidyverse)
library(tidytags)

u <- 'https://docs.google.com/spreadsheets/d/1JnPAcgkx_lYnmFVj4c6vyuSgteAsG3yruZne3Nj89DE/edit?usp=sharing'
t <- tidytags::read_tags(u)
td <- pull_tweet_data(t$id_str)

tds <- td %>% 
  filter(str_detect(text, "mathathome"))

write_csv(flatten(tds), "rtweet-data.csv")