library(tidyverse)
library(ggraph)
library(tidygraph)
library(tidytags)
library(leaflet)
d1 <- read_csv("googlesheets-tweets.csv")
d %>%
count(screen_name) %>%
arrange(desc(n))
## # A tibble: 228 x 2
## screen_name n
## <chr> <int>
## 1 MLCmath 44
## 2 JUMP_Math 16
## 3 DrMNikfar 11
## 4 EarlyMathEDC 10
## 5 ForestGlenGator 9
## 6 staaSTALLIONS 9
## 7 Mathcutups 7
## 8 abmath7 6
## 9 caob2018 6
## 10 TaothaSchool 6
## # … with 218 more rows
d %>%
count(screen_name) %>%
arrange(desc(n)) %>%
ggplot(aes(x = n)) +
geom_histogram()
n_tweets <- d %>%
count(screen_name)
users <- n_tweets %>%
filter(n >= 2) %>%
pull(screen_name) %>%
rtweet::lookup_users()
codes <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vQPer3xRgn56MOIznyAOPTFvFEci47EVkIOoSYN5GuGcYiDw5t24jExKHBdoGJlRryAGxi2yFlShd42/pub?output=csv")
users %>%
arrange(screen_name) %>%
filter(!(screen_name %in% codes$screen_name)) %>%
select(screen_name, description)
## # A tibble: 0 x 2
## # … with 2 variables: screen_name <chr>, description <chr>
codes <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vQPer3xRgn56MOIznyAOPTFvFEci47EVkIOoSYN5GuGcYiDw5t24jExKHBdoGJlRryAGxi2yFlShd42/pub?output=csv")
codes %>%
count(code) %>%
arrange(desc(n))
## # A tibble: 8 x 2
## code n
## <chr> <int>
## 1 teacher 12
## 2 school 9
## 3 organization 7
## 4 unclear 7
## 5 coach 6
## 6 professor 3
## 7 hashtag 1
## 8 media 1
codes %>%
count(code, `also-parent`) %>%
filter(!is.na(`also-parent`))
## # A tibble: 3 x 3
## code `also-parent` n
## <chr> <dbl> <int>
## 1 professor 1 1
## 2 teacher 1 2
## 3 unclear 1 1
edgelist <- d %>%
tidytags::create_edgelist()
edgelist %>%
count(edge_type) %>%
arrange(desc(n))
## # A tibble: 4 x 2
## edge_type n
## <chr> <int>
## 1 mention 319
## 2 retweet 166
## 3 quote-tweet 17
## 4 reply 7
edgelist %>%
count(sender, receiver) %>%
arrange(desc(n))
## # A tibble: 273 x 3
## sender receiver n
## <chr> <chr> <int>
## 1 ForestGlenGator JUMP_Math 18
## 2 caob2018 JUMP_Math 12
## 3 TaothaSchool JUMP_Math 12
## 4 TTPowerUp1 JUMP_Math 12
## 5 STPStServices JUMP_Math 10
## 6 DrMNikfar HobbsTimes4 8
## 7 morley_school JUMP_Math 8
## 8 NakodaSchool JUMP_Math 8
## 9 StoneyEducation JUMP_Math 8
## 10 ChiefBearspaw JUMP_Math 6
## # … with 263 more rows
codes_ss <- select(codes, screen_name, code, ntweets, parent = `also-parent`)
# edgelist <- tidytags::add_users_data(edgelist, codes_ss)
users <- n_tweets %>%
left_join(codes_ss) %>%
mutate(code = if_else(is.na(code), "uncoded", code))
users
## # A tibble: 228 x 5
## screen_name n code ntweets parent
## <chr> <int> <chr> <dbl> <dbl>
## 1 __LaurenCarr 1 uncoded NA NA
## 2 _LarissaChan 1 uncoded NA NA
## 3 070180 1 uncoded NA NA
## 4 0rtizclassroom 1 uncoded NA NA
## 5 24game 1 uncoded NA NA
## 6 4ahealthyhabit 1 uncoded NA NA
## 7 AbecedaireApps 1 uncoded NA NA
## 8 abmath7 6 professor 5 NA
## 9 adrianmendozaed 1 uncoded NA NA
## 10 akhlaquequmar35 1 uncoded NA NA
## # … with 218 more rows
# edgelist %>%
# left_join(rename(n_tweets, sender = screen_name, ntweets_sender_1 = n)) %>%
# left_join(select(n_tweets, receiver = screen_name, ntweets_receiver_1 = n)) %>%
# mutate(ntweets_sender = if_else(is.na(ntweets_sender), ntweets_sender_1, as.integer(ntweets_sender))) %>%
# mutate(ntweets_receiver = if_else(is.na(ntweets_receiver), ntweets_sender_1, as.integer(ntweets_receiver)))
edgelist
## # A tibble: 509 x 3
## sender receiver edge_type
## <chr> <chr> <chr>
## 1 JJDLagrange YehCathery reply
## 2 MTL_CS_BozziK MTL_CS_BozziK reply
## 3 FirstInMath KAndrew000 reply
## 4 AnastasiaBetts DigitalMasala reply
## 5 pelletier_t3 pelletier_t3 reply
## 6 aprilschoenberg missmillerin5th reply
## 7 UTEddieBrown UTEddieBrown reply
## 8 iteachmathAll lbnorris09 retweet
## 9 iteachmathAll Mathcutups retweet
## 10 iteachmathAll Mathcutups retweet
## # … with 499 more rows
edgelist_ss <- edgelist %>%
filter(sender %in% users$screen_name & receiver %in% users$screen_name)
users <- mutate(users, parent = as.factor(ifelse(is.na(parent), "No", "Yes")))
users$screen_name_addorned <- ifelse(users$parent == 1, str_c(users$screen_name, "*"), users$screen_name)
graph <- igraph::graph_from_data_frame(edgelist_ss, vertices = users) %>%
as_tbl_graph() %>%
mutate(Popularity = centrality_degree(mode = 'in'))
ggraph(graph, layout = 'kk') +
geom_edge_fan(aes(alpha = stat(index)), show.legend = FALSE) +
geom_node_point(aes(size = Popularity, color = code, shape = parent)) +
geom_node_text(aes(label = ifelse(Popularity > 3.5, name, NA))) +
theme_graph() +
scale_color_brewer("Role", type = "qual", palette = 3) +
labs(caption = "Only those who posted one or more original tweets are included") +
guides(size = FALSE)
Using the Google Maps API based on the self-reported Twitter location
# d <- d %>%
# mutate(location = ifelse(str_detect(location, "#"), NA, location))
#
# geocoded_locs <- d %>%
# tidytags::geocode_tags()
#
# write_rds(geocoded_locs, "geocoded-locs.rds")
geocoded_locs <- read_rds("geocoded-locs.rds")
users_all <- rtweet::users_data(d) %>%
left_join(users)
# geocoded_locs$code <- users_all$code
# geocoded_locs$parent <- users_all$parent
# example_unique_places <- dplyr::distinct(geocoded_locs, location, .keep_all = TRUE)
# example_geo_coords <- geocode_tags(example_unique_places)
pal <- colorFactor(RColorBrewer::brewer.pal(9, "Set1"), unique(users_all$code))
leaflet() %>%
addProviderTiles("OpenStreetMap") %>%
addCircleMarkers(data = geocoded_locs,
radius = users_all$n,
label = str_c(d$screen_name, "; Parent: ", users_all$parent, "; N tweets: ", users_all$n),
color = ~pal(users_all$code)) %>%
addLegend("bottomright", pal = pal, values = users_all$code,
title = "Role")
Data sources for this project include tweets containing the #mathathome Twitter hashtag. We identified tweets containing #mathathome in two ways:
- Manually identifying tweets containing #mathathome from March 20th, 2020 through April 23, 2020 (n = 160 tweets)
- Automatically collected tweets containing #mathathome from April 22 through May 21 using a Twitter Archiving Google Sheet (TAGS; Hawksey, 2020) (n = 246 tweets)
Because of the different ways we collected the data, we used the rtweet package (Kearney, 2019) for the R statistical software (R Core Team, 2020) to further process the data. The result of using rtweet to process the 406 tweets was a combined dataset with 406 tweets and 90 unique variables for each tweet (including the text of the tweet, the screen name and profile description for the individual who posted the tweet, when the tweet was posted, and other information, such as the number of times the tweet was retweeted).
The 406 tweets in the combined dataset were sent by 228 unique participants. While some participants were very active (posting more than 40 tweets in one case), on avergae, participating individuals posted around two tweets to #mathathome (M = 1.78, SD = 3.33).
We constructed measures for individual’s a) professional role, b) identification as a parent (in addition to and seperate from the measure for professional role), and c) geographic location), as we describe next.
To measure participants’ professional role, we used a coding frame developed from past research (for the professional role of participants in state-based educational Twitter hashtags; Rosenberg et al., 2016). This coding frame included codes for teachers, instructional coaches, researchers, and other roles (see Table SX). One coder applied this coding frame to the profile descriptions of all of the individuals who posted two or more tweets (as those who posted only a single tweet were likely to be less critical to our understanding of #mathathome). Of the 46 individuals who posted two or more tweets, seven (15%) were unable to be coded (because individual’s profile description provided limited information or individual’s professional role was unclear).
In addition to the measure for professional role, we used individual’s profile description and the content of their tweets to determine whether they identified as a parent.
To measure individual’s geographic location, we geocoded individual’s self-reported location (from their Twitter profiles) using the mapsapi R package (Dorman, 2020). Locations were able to be obtained for 194 of the 320 individuals who participated in #mathathome (60%), somewhat lower than in other studies involving geocoding of Twitter users’ self-reported locations (Greenhalgh et al., 2018); for those individuals with locations, however, these locations are likely to be sufficiently accurate for understanding the country, state, or province participants are likely from.
To analyze the data, we . . .
library(tidyverse)
older_data <- read_lines("older-mathathome-tweets.txt")
older_data_words <- older_data %>%
str_split(" ") %>%
unlist()
is_a_link <- older_data_words %>%
str_detect("t.co")
links <- older_data_words[is_a_link]
expanded_links <- longurl::expand_urls(links)
expanded_links %>% count(orig_url) %>%
arrange(desc(n))
expanded_links <- expanded_links %>%
filter(str_detect(expanded_url, "twitter.com"))
statuses <- str_split(expanded_links$expanded_url, "/")
statuses <- statuses %>% map_chr(~.[6])
gss <- pull_tweet_data(statuses)
write_csv(flatten(gss), "googlesheets-tweets.csv")