Loading, setting up

library(tidyverse)
library(ggraph)
library(tidygraph)
library(tidytags)
library(leaflet)

Code for accessing TAGS data

u <- 'https://docs.google.com/spreadsheets/d/e/2PACX-1vTC6XLObjMuo6xd2I3Xa-56te7-dlx63QeH7-zmuH2_GtTud9eNE8k9m8togz8Fa9XI_O4eOjO7zKOb/pubhtml'

tags_data <- read_csv("tags-data.csv")

# t <- tidytags::read_tags(u)

td <- pull_tweet_data(tags_data$id_str)

tds <- td %>% 
  filter(str_detect(text, "mathathome"))

write_csv(rtweet::flatten(tds), "rtweet-data.csv")

Merging and then viewing data

Tweets from Google Sheets

d1 <- read_csv("googlesheets-tweets.csv")

Tweets from TAGS

d2 <- read_csv("rtweet-data.csv")

d <- d1 %>% 
  bind_rows(d2)

d %>% 
  glimpse()

## Rows: 406
## Columns: 90
## $ user_id                 <dbl> 1093954938, 1093954938, 1093954938, 109395493…
## $ status_id               <dbl> 1.252666e+18, 1.252671e+18, 1.250458e+18, 1.2…
## $ created_at              <dttm> 2020-04-21 18:30:12, 2020-04-21 18:52:03, 20…
## $ screen_name             <chr> "MLCmath", "MLCmath", "MLCmath", "MLCmath", "…
## $ text                    <chr> "#FirstGrade Activity of the Day: Which One D…
## $ source                  <chr> "Sprout Social", "Sprout Social", "Hootsuite …
## $ display_text_width      <dbl> 260, 261, 273, 272, 273, 253, 246, 270, 251, …
## $ reply_to_status_id      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ reply_to_user_id        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ reply_to_screen_name    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ is_quote                <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ is_retweet              <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ favorite_count          <dbl> 8, 2, 4, 8, 7, 4, 6, 5, 12, 4, 5, 7, 6, 2, 12…
## $ retweet_count           <dbl> 8, 5, 4, 9, 7, 4, 6, 6, 6, 3, 8, 8, 8, 2, 14,…
## $ quote_count             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ reply_count             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ hashtags                <chr> "FirstGrade MathAtHome DistanceLearning", "Se…
## $ symbols                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ urls_url                <chr> "mathathome.mathlearningcenter.org/grade-1", …
## $ urls_t.co               <chr> "https://t.co/RUywpyfOhL", "https://t.co/1u1p…
## $ urls_expanded_url       <chr> "https://mathathome.mathlearningcenter.org/gr…
## $ media_url               <chr> "http://pbs.twimg.com/media/EWJdDHIWoAQNf1k.p…
## $ media_t.co              <chr> "https://t.co/7Akukkm9a5", "https://t.co/w4ha…
## $ media_expanded_url      <chr> "https://twitter.com/MLCmath/status/125266595…
## $ media_type              <chr> "photo", "photo", "photo", "photo", "photo", …
## $ ext_media_url           <chr> "http://pbs.twimg.com/media/EWJdDHIWoAQNf1k.p…
## $ ext_media_t.co          <chr> "https://t.co/7Akukkm9a5", "https://t.co/w4ha…
## $ ext_media_expanded_url  <chr> "https://twitter.com/MLCmath/status/125266595…
## $ ext_media_type          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ mentions_user_id        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ mentions_screen_name    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lang                    <chr> "en", "en", "en", "en", "en", "en", "en", "en…
## $ quoted_status_id        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_text             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_created_at       <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ quoted_source           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_favorite_count   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_retweet_count    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_user_id          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_screen_name      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_name             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_followers_count  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_friends_count    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_statuses_count   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_location         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_description      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ quoted_verified         <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_status_id       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_text            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_created_at      <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ retweet_source          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_favorite_count  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_retweet_count   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_user_id         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_screen_name     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_name            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_followers_count <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_friends_count   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_statuses_count  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_location        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_description     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ retweet_verified        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ place_url               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ place_name              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ place_full_name         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ place_type              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ country                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ country_code            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ geo_coords              <chr> "NA NA", "NA NA", "NA NA", "NA NA", "NA NA", …
## $ coords_coords           <chr> "NA NA", "NA NA", "NA NA", "NA NA", "NA NA", …
## $ bbox_coords             <chr> "NA NA NA NA NA NA NA NA", "NA NA NA NA NA NA…
## $ status_url              <chr> "https://twitter.com/MLCmath/status/125266595…
## $ name                    <chr> "Math Learning Center", "Math Learning Center…
## $ location                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ description             <chr> "The Math Learning Center is a nonprofit orga…
## $ url                     <chr> "http://t.co/ecNvJLHk", "http://t.co/ecNvJLHk…
## $ protected               <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ followers_count         <dbl> 4305, 4305, 4305, 4305, 4305, 4305, 4305, 430…
## $ friends_count           <dbl> 978, 978, 978, 978, 978, 978, 978, 978, 978, …
## $ listed_count            <dbl> 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5…
## $ statuses_count          <dbl> 1575, 1575, 1575, 1575, 1575, 1575, 1575, 157…
## $ favourites_count        <dbl> 2335, 2335, 2335, 2335, 2335, 2335, 2335, 233…
## $ account_created_at      <dttm> 2013-01-16 03:11:46, 2013-01-16 03:11:46, 20…
## $ verified                <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ profile_url             <chr> "http://t.co/ecNvJLHk", "http://t.co/ecNvJLHk…
## $ profile_expanded_url    <chr> "http://www.mathlearningcenter.org", "http://…
## $ account_lang            <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ profile_banner_url      <chr> "https://pbs.twimg.com/profile_banners/109395…
## $ profile_background_url  <chr> "http://abs.twimg.com/images/themes/theme2/bg…
## $ profile_image_url       <chr> "http://pbs.twimg.com/profile_images/88697277…

Number of individuals

d %>% 
  count(screen_name) %>% 
  arrange(desc(n))

## # A tibble: 228 x 2
##    screen_name         n
##    <chr>           <int>
##  1 MLCmath            44
##  2 JUMP_Math          16
##  3 DrMNikfar          11
##  4 EarlyMathEDC       10
##  5 ForestGlenGator     9
##  6 staaSTALLIONS       9
##  7 Mathcutups          7
##  8 abmath7             6
##  9 caob2018            6
## 10 TaothaSchool        6
## # … with 218 more rows

Distribution of number of tweets per individual

d %>% 
  count(screen_name) %>% 
  arrange(desc(n)) %>% 
  ggplot(aes(x = n)) +
  geom_histogram()

Number of indivdiuals with two or more original tweets

n_tweets <- d %>% 
  count(screen_name)

users <- n_tweets %>% 
  filter(n >= 2) %>% 
  pull(screen_name) %>% 
  rtweet::lookup_users()

codes <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vQPer3xRgn56MOIznyAOPTFvFEci47EVkIOoSYN5GuGcYiDw5t24jExKHBdoGJlRryAGxi2yFlShd42/pub?output=csv")

users %>% 
  arrange(screen_name) %>% 
  filter(!(screen_name %in% codes$screen_name)) %>% 
  select(screen_name, description)

## # A tibble: 0 x 2
## # … with 2 variables: screen_name <chr>, description <chr>

Coding for professional role

codes <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vQPer3xRgn56MOIznyAOPTFvFEci47EVkIOoSYN5GuGcYiDw5t24jExKHBdoGJlRryAGxi2yFlShd42/pub?output=csv")

codes %>% 
  count(code) %>% 
  arrange(desc(n))

## # A tibble: 8 x 2
##   code             n
##   <chr>        <int>
## 1 teacher         12
## 2 school           9
## 3 organization     7
## 4 unclear          7
## 5 coach            6
## 6 professor        3
## 7 hashtag          1
## 8 media            1

codes %>% 
  count(code, `also-parent`) %>% 
  filter(!is.na(`also-parent`))

## # A tibble: 3 x 3
##   code      `also-parent`     n
##   <chr>             <dbl> <int>
## 1 professor             1     1
## 2 teacher               1     2
## 3 unclear               1     1

Prepare edgelist

edgelist <- d %>% 
  tidytags::create_edgelist()

edgelist %>% 
  count(edge_type) %>% 
  arrange(desc(n))

## # A tibble: 4 x 2
##   edge_type       n
##   <chr>       <int>
## 1 mention       319
## 2 retweet       166
## 3 quote-tweet    17
## 4 reply           7

edgelist %>% 
  count(sender, receiver) %>% 
  arrange(desc(n))

## # A tibble: 273 x 3
##    sender          receiver        n
##    <chr>           <chr>       <int>
##  1 ForestGlenGator JUMP_Math      18
##  2 caob2018        JUMP_Math      12
##  3 TaothaSchool    JUMP_Math      12
##  4 TTPowerUp1      JUMP_Math      12
##  5 STPStServices   JUMP_Math      10
##  6 DrMNikfar       HobbsTimes4     8
##  7 morley_school   JUMP_Math       8
##  8 NakodaSchool    JUMP_Math       8
##  9 StoneyEducation JUMP_Math       8
## 10 ChiefBearspaw   JUMP_Math       6
## # … with 263 more rows

codes_ss <- select(codes, screen_name, code, ntweets, parent = `also-parent`)

# edgelist <- tidytags::add_users_data(edgelist, codes_ss)

users <- n_tweets %>% 
  left_join(codes_ss) %>% 
  mutate(code = if_else(is.na(code), "uncoded", code))

users

## # A tibble: 228 x 5
##    screen_name         n code      ntweets parent
##    <chr>           <int> <chr>       <dbl>  <dbl>
##  1 __LaurenCarr        1 uncoded        NA     NA
##  2 _LarissaChan        1 uncoded        NA     NA
##  3 070180              1 uncoded        NA     NA
##  4 0rtizclassroom      1 uncoded        NA     NA
##  5 24game              1 uncoded        NA     NA
##  6 4ahealthyhabit      1 uncoded        NA     NA
##  7 AbecedaireApps      1 uncoded        NA     NA
##  8 abmath7             6 professor       5     NA
##  9 adrianmendozaed     1 uncoded        NA     NA
## 10 akhlaquequmar35     1 uncoded        NA     NA
## # … with 218 more rows

# edgelist %>% 
#   left_join(rename(n_tweets, sender = screen_name, ntweets_sender_1 = n)) %>% 
#   left_join(select(n_tweets, receiver = screen_name, ntweets_receiver_1 = n)) %>% 
#   mutate(ntweets_sender = if_else(is.na(ntweets_sender), ntweets_sender_1, as.integer(ntweets_sender))) %>% 
#   mutate(ntweets_receiver = if_else(is.na(ntweets_receiver), ntweets_sender_1, as.integer(ntweets_receiver)))

edgelist

## # A tibble: 509 x 3
##    sender          receiver        edge_type
##    <chr>           <chr>           <chr>    
##  1 JJDLagrange     YehCathery      reply    
##  2 MTL_CS_BozziK   MTL_CS_BozziK   reply    
##  3 FirstInMath     KAndrew000      reply    
##  4 AnastasiaBetts  DigitalMasala   reply    
##  5 pelletier_t3    pelletier_t3    reply    
##  6 aprilschoenberg missmillerin5th reply    
##  7 UTEddieBrown    UTEddieBrown    reply    
##  8 iteachmathAll   lbnorris09      retweet  
##  9 iteachmathAll   Mathcutups      retweet  
## 10 iteachmathAll   Mathcutups      retweet  
## # … with 499 more rows

Create graph using tidygraph

edgelist_ss <- edgelist %>% 
  filter(sender %in% users$screen_name & receiver %in% users$screen_name)

users <- mutate(users, parent = as.factor(ifelse(is.na(parent), "No", "Yes")))

users$screen_name_addorned <- ifelse(users$parent == 1, str_c(users$screen_name, "*"), users$screen_name)

graph <- igraph::graph_from_data_frame(edgelist_ss, vertices = users) %>% 
  as_tbl_graph() %>% 
  mutate(Popularity = centrality_degree(mode = 'in'))

Plot using ggraph

ggraph(graph, layout = 'kk') + 
  geom_edge_fan(aes(alpha = stat(index)), show.legend = FALSE) + 
  geom_node_point(aes(size = Popularity, color = code, shape = parent)) +
  geom_node_text(aes(label = ifelse(Popularity > 3.5, name, NA))) + 
  theme_graph() +
  scale_color_brewer("Role", type = "qual", palette = 3) +
  labs(caption = "Only those who posted one or more original tweets are included") +
  guides(size = FALSE)

Geocoded locations

Using the Google Maps API based on the self-reported Twitter location

# d <- d %>% 
#   mutate(location = ifelse(str_detect(location, "#"), NA, location))
# 
# geocoded_locs <- d %>% 
#   tidytags::geocode_tags()
# 
# write_rds(geocoded_locs, "geocoded-locs.rds")

geocoded_locs <- read_rds("geocoded-locs.rds")

users_all <- rtweet::users_data(d) %>% 
  left_join(users)

# geocoded_locs$code <- users_all$code
# geocoded_locs$parent <- users_all$parent

# example_unique_places <- dplyr::distinct(geocoded_locs, location, .keep_all = TRUE)
# example_geo_coords <- geocode_tags(example_unique_places)
pal <- colorFactor(RColorBrewer::brewer.pal(9, "Set1"), unique(users_all$code))

leaflet() %>% 
  addProviderTiles("OpenStreetMap") %>% 
  addCircleMarkers(data = geocoded_locs,
                   radius = users_all$n,
                   label = str_c(d$screen_name, "; Parent: ", users_all$parent, "; N tweets: ", users_all$n),
                   color = ~pal(users_all$code)) %>% 
  addLegend("bottomright", pal = pal, values = users_all$code, 
            title = "Role")

Ideas for next steps

Update data: probably good to do after 5/14 (add another ~two weeks of TAGS data)
Code additional participants’ professional roles
Code for parents in a more incisive way (only used identification in profile, rather than content of tweets)

Appendix

Method section content

Data Sources

Data sources for this project include tweets containing the #mathathome Twitter hashtag. We identified tweets containing #mathathome in two ways:
- Manually identifying tweets containing #mathathome from March 20th, 2020 through April 23, 2020 (n = 160 tweets)
- Automatically collected tweets containing #mathathome from April 22 through May 21 using a Twitter Archiving Google Sheet (TAGS; Hawksey, 2020) (n = 246 tweets)

Because of the different ways we collected the data, we used the rtweet package (Kearney, 2019) for the R statistical software (R Core Team, 2020) to further process the data. The result of using rtweet to process the 406 tweets was a combined dataset with 406 tweets and 90 unique variables for each tweet (including the text of the tweet, the screen name and profile description for the individual who posted the tweet, when the tweet was posted, and other information, such as the number of times the tweet was retweeted).

Participants

The 406 tweets in the combined dataset were sent by 228 unique participants. While some participants were very active (posting more than 40 tweets in one case), on avergae, participating individuals posted around two tweets to #mathathome (M = 1.78, SD = 3.33).

Measures

We constructed measures for individual’s a) professional role, b) identification as a parent (in addition to and seperate from the measure for professional role), and c) geographic location), as we describe next.

Professional Role

To measure participants’ professional role, we used a coding frame developed from past research (for the professional role of participants in state-based educational Twitter hashtags; Rosenberg et al., 2016). This coding frame included codes for teachers, instructional coaches, researchers, and other roles (see Table SX). One coder applied this coding frame to the profile descriptions of all of the individuals who posted two or more tweets (as those who posted only a single tweet were likely to be less critical to our understanding of #mathathome). Of the 46 individuals who posted two or more tweets, seven (15%) were unable to be coded (because individual’s profile description provided limited information or individual’s professional role was unclear).

Identification as a Parent

In addition to the measure for professional role, we used individual’s profile description and the content of their tweets to determine whether they identified as a parent.

Geographic Location

To measure individual’s geographic location, we geocoded individual’s self-reported location (from their Twitter profiles) using the mapsapi R package (Dorman, 2020). Locations were able to be obtained for 194 of the 320 individuals who participated in #mathathome (60%), somewhat lower than in other studies involving geocoding of Twitter users’ self-reported locations (Greenhalgh et al., 2018); for those individuals with locations, however, these locations are likely to be sufficiently accurate for understanding the country, state, or province participants are likely from.

Data Analysis

To analyze the data, we . . .

Code for accessing Google Sheets data

library(tidyverse)

older_data <- read_lines("older-mathathome-tweets.txt")
older_data_words <- older_data %>% 
  str_split(" ") %>% 
  unlist()

is_a_link <- older_data_words %>% 
  str_detect("t.co") 

links <- older_data_words[is_a_link]

expanded_links <- longurl::expand_urls(links)

expanded_links %>% count(orig_url) %>% 
  arrange(desc(n))

expanded_links <- expanded_links %>% 
  filter(str_detect(expanded_url, "twitter.com"))

statuses <- str_split(expanded_links$expanded_url, "/")
statuses <- statuses %>% map_chr(~.[6])

gss <- pull_tweet_data(statuses)

write_csv(flatten(gss), "googlesheets-tweets.csv")

Analysis of #mathathome