0. loading, setting up
remove(list = ls())
library(tidyverse)
library(stringr)
library(lubridate)
library(ggthemes)
library(cowplot)
library(igraph)
library(GGally)
# data for cohort 2 processed with the Twitter Data Suite
df_c2 <- read_csv("~/Google Drive/1_Research/Twitter_Data_Suite/hashtag/urbanstem_cohort2/2_tweets.csv")
# raw data collected with Agarwal's Twitter tracker
df_t1 <- read_csv("/Users/joshuarosenberg/Google Drive/6_Data/MAET/maet_data_1-2014-7-2015.csv")
df_t1 <- filter(df_t1, tolower(hashtag) == "#msurbanstem")
# raw data collected with TAGS
df_t2 <- read_csv("/Users/joshuarosenberg/Google Drive/6_Data/MAET/maet_data_7_2015-12-2016.csv")
df_t2$date <- lubridate::parse_date_time(df_t2$created_at, "%a %b! %d! %H! %M! %S! %z! %y!")
df_t2 <- filter(df_t2, stringr::str_detect(tolower(df_t2$entities_str), "msurbanstem"))
membership <- read_csv("~/Dropbox/1_Research/MSU Urban STEM SNA/membership.csv")
df_all <- bind_rows(df_t1, df_t2)
1. Time Series
df_all$date_day <- round_date(df_all$date, "day")
to_plot <- count(df_all, date_day)
the_limits = c(ymd("2014-06-01"), ymd("2017-07-30"))
the_limits <- as.POSIXct(the_limits)
first_cohort_begins = as.POSIXct(ymd("2014-07-15"))
first_cohort_ends = as.POSIXct(ymd("2015-06-30"))
first_cohort_annotation = as.POSIXct(ymd("2014-12-25"))
second_cohort_begins = as.POSIXct(ymd("2015-07-15"))
second_cohort_ends = as.POSIXct(ymd("2016-06-30"))
second_cohort_annotation = as.POSIXct(ymd("2015-12-25"))
third_cohort_begins = as.POSIXct(ymd("2016-07-15"))
third_cohort_ends = as.POSIXct(ymd("2017-06-30"))
third_cohort_annotation = as.POSIXct(ymd("2016-12-25"))
ggplot(to_plot, aes(x = date_day, y = n)) +
geom_line() +
scale_x_datetime(limits = the_limits) +
geom_rect(aes(xmin = first_cohort_begins,
xmax = first_cohort_ends,
ymin = -Inf,
ymax = Inf),
fill = "#d8b365",
alpha = 0.01) +
geom_rect(aes(xmin = second_cohort_begins,
xmax = second_cohort_ends,
ymin = -Inf,
ymax = Inf),
fill = "#f5f5f5",
alpha = 0.01) +
geom_rect(aes(xmin = third_cohort_begins,
xmax = third_cohort_ends,
ymin = -Inf,
ymax = Inf),
fill = "#5ab4ac",
alpha = 0.01) +
annotate("text", x = first_cohort_annotation, y = 175, label = "1st Cohort") +
annotate("text", x = second_cohort_annotation, y = 175, label = "2nd Cohort") +
annotate("text", x = third_cohort_annotation, y = 175, label = "3rd Cohort") +
theme_minimal() +
ylab("Number of Tweets Per Day") +
xlab(NULL) +
ggtitle(NULL) +
theme(text = element_text(size = 16, family = "Times"))

ggsave("msurbanstem_timeseries.png", width = 10, height = 9)
2. Processing
create_the_edgelist <- function(df, sender_col, receiver_col){
df_ss <- filter(df, type == "ORIG" | type == "REPLY")
receiver <- select_(df_ss, receiver_col)
receiver <- collect(select_(receiver, receiver_col))[[1]]
df_for_sender <- select_(df_ss, sender_col)
df_for_sender <- collect(select_(df_for_sender, sender_col))[[1]]
sender <- stringr::str_split(df_for_sender, "\\*")
tmp = stack(setNames(sender, receiver))[, 2:1]
names(tmp) <- c("receiver", "sender")
tmp$sender <- tolower(tmp$sender)
tmp$receiver <- tolower(tmp$receiver)
tmp <- tmp %>% dplyr::mutate(var = sender_col)
tmp <- filter(tmp, !is.na(sender))
tmp <- tbl_df(tmp)
return(tmp)
}
favorites <- create_the_edgelist(df_c2, "favNames", "screen_name")
mentions <- create_the_edgelist(df_c2, "non_reply_mentions", "screen_name")
retweets <- create_the_edgelist(df_c2, "rtNames", "screen_name")
replies <- create_the_edgelist(df_c2, "reply_user_sn", "screen_name")
all_df <- bind_rows(favorites, mentions, retweets, replies)
all_df$var <- ifelse(all_df$var == "favNames", "Favorites",
ifelse(all_df$var == "non_reply_mentions", "Mentions",
ifelse(all_df$var == "rtNames", "Retweets",
ifelse(all_df$var == "reply_user_sn", "Replies", NA))))
# Joining membership data
membership$membership <- car::recode(membership$membership, "c(2, 3) = 6; 4 = 7; c(1, 7, 6) = 4; 5 = 5; 10 = 1; 9 = 2; 11 = 3; 8 = 8")
membership$membership <- car::recode(membership$membership, "c(4, 5, 6, 7, 8) = 4")
membership$membership <- car::recode(membership$membership, "1 = '1st Cohort'; 2 = '2nd Cohort'; 3 = 'Instructional Team'; 4 = 'Other'")
membership <- rename(membership, receiver_membership = membership)
membership <- mutate(membership, sender_membership = receiver_membership)
membership <- rename(membership, sender = users)
membership <- mutate(membership, receiver = sender)
all_df_ss <- left_join(all_df, select(membership, sender, sender_membership), by = "sender")
all_df_ss <- left_join(all_df_ss, select(membership, receiver, receiver_membership), by = "receiver")
all_df_ss <- select(all_df_ss, sender, receiver, sender_membership, receiver_membership, var)
all_df_ss[is.na(all_df_ss$sender_membership), "sender_membership"] <- "Other"
all_df_ss[is.na(all_df_ss$receiver_membership), "receiver_membership"] <- "Other"
# filtering others
all_df_filtered <- filter(all_df_ss, sender %in% receiver)
# adding 1st and 2nd cohort and instructors with no ties
# all_df_filtered %>%
# filter(receiver_membership == "1st Cohort" |
# receiver_membership == "2nd Cohort" |
# receiver_membership == "Instructional Tea") %>%
# group_by(var, sender) %>%
# summarize(n = n()) %>% View()
3. Degree plots
create_density_plots <- function(edgelist, selected_var, mode){
edgelist_ss <- filter(edgelist, var == selected_var)
if (mode == "in"){
edge1 <- filter(edgelist_ss, receiver_membership == "1st Cohort")
edge2 <- filter(edgelist_ss, receiver_membership == "2nd Cohort")
edge3 <- filter(edgelist_ss, receiver_membership == "Instructional Team")
edge4 <- filter(edgelist_ss, receiver_membership == "Other")
} else {
edge1 <- filter(edgelist_ss, sender_membership == "1st Cohort")
edge2 <- filter(edgelist_ss, sender_membership == "2nd Cohort")
edge3 <- filter(edgelist_ss, sender_membership == "Instructional Team")
edge4 <- filter(edgelist_ss, sender_membership == "Other")
}
graph1 <- igraph::graph_from_data_frame(edge1[, 1:2])
df1 <- data_frame(degree = igraph::degree(graph1, mode = mode))
df1$group = "1st Cohort"
graph2 <- igraph::graph_from_data_frame(edge2[, 1:2])
df2 <- data_frame(degree = igraph::degree(graph2, mode = mode))
df2$group = "2nd Cohort"
graph3 <- igraph::graph_from_data_frame(edge3[, 1:2])
df3 <- data_frame(degree = igraph::degree(graph3, mode = mode))
df3$group = "Instructional Team"
graph4 <- igraph::graph_from_data_frame(edge4[, 1:2])
df4 <- data_frame(degree = igraph::degree(graph4, mode = mode))
df4$group = "Other"
df <- bind_rows(df1, df2, df3, df4)
df$degree <- ifelse(df$degree > 25, 25, df$degree)
ggplot(df, aes(x = degree, fill = group)) +
geom_histogram(binwidth = 1) +
ylab("Number of Participants") +
xlab(paste0(Hmisc::capitalize(mode), "-degree")) +
ggtitle(selected_var) +
theme(legend.title = element_text(size = 16),
text = element_text(family = "Times")) +
scale_fill_discrete("") +
xlim(-.5, 25.5)
}
create_plot_grid <- function(df, interaction_vector, mode){
p1 <- create_density_plots(all_df_filtered, interaction_vector[1], mode)
p2 <- create_density_plots(all_df_filtered, interaction_vector[2], mode)
p3 <- create_density_plots(all_df_filtered, interaction_vector[3], mode)
p4 <- create_density_plots(all_df_filtered, interaction_vector[4], mode)
legend_b <- get_legend(p1 + theme(legend.position="bottom"))
prow <-
plot_grid(p1 + theme(legend.position="none"),
p2 + theme(legend.position="none"),
p3 + theme(legend.position="none"),
p4 + theme(legend.position="none"),
nrow = 2)
title <- NULL
p <- plot_grid(title, legend_b, prow, nrow = 3, rel_heights = c(.01, .075, 1))
return(p)
}
in_plot <- create_plot_grid(all_df_filtered,
c("Mentions",
"Replies",
"Favorites",
"Retweets"),
"in")
in_plot

ggsave("indegree_plot.png", width = 6, height = 6)
out_plot <- create_plot_grid(all_df_filtered,
c("Mentions",
"Replies",
"Favorites",
"Retweets"),
"out")
out_plot

ggsave("outdegree_plot.png", width = 6, height = 6)
# ggsave("p.png", width = 10, height = 10)
4. Sociograms
make_the_graph <- function(df, interaction){
df_ss <- filter(df, var == interaction)
df_ss <- filter(df_ss, sender != receiver)
sender_df <- select(df_ss, username = sender, membership = sender_membership)
receiver_df <- select(df_ss, username = receiver, membership = receiver_membership)
vertex_df <- bind_rows(sender_df, receiver_df)
vertex_df_ss <- distinct(vertex_df)
g <- igraph::graph_from_data_frame(df_ss[, 1:2], vertices = vertex_df_ss, directed = T)
g <- set_edge_attr(g, "weight", value = 1)
g <- simplify(g, remove.multiple = T, remove.loops = T, edge.attr.comb = list(weight = "sum"))
g <- igraph::set_vertex_attr(g, "degree", value = igraph::degree(g, mode = "all"))
tmp <- igraph::get.vertex.attribute(g, "degree")
g <- igraph::set_vertex_attr(g, "degree_s", value = tmp / 3)
tmp <- igraph::get.edge.attribute(g, "weight")
tmp <- ifelse(tmp > 25, 15, tmp)
g <- igraph::set_edge_attr(g, "weight_s", value = tmp / 5)
p <- ggnet2(g,
color = "membership",
size = "degree_s",
palette = "Set1",
directed = T,
arrow.size = 3,
edge.size = "weight_s",
label = F,
arrow.gap = .05) +
theme(legend.title=element_blank(),
text = element_text(size = 25, family = "Times")) +
guides(size = F) +
ggtitle(Hmisc::capitalize(interaction))
return(p)
}
create_soc_grid <- function(df, interaction_vector, mode){
g_mentions <- make_the_graph(df, interaction_vector[1])
g_replies <- make_the_graph(df, interaction_vector[2])
g_favorites <- make_the_graph(df, interaction_vector[3])
g_retweets <- make_the_graph(df, interaction_vector[4])
legend_b <- get_legend(g_mentions + theme(legend.position="bottom"))
prow <-
plot_grid(g_mentions + theme(legend.position="none"),
g_replies + theme(legend.position="none"),
g_favorites + theme(legend.position="none"),
g_retweets + theme(legend.position="none"),
nrow = 2)
title <- NULL
p <- plot_grid(title, legend_b, prow, nrow = 3, rel_heights = c(.01, .075, 1))
return(p)
}
create_soc_grid(all_df_filtered,
c("Mentions",
"Replies",
"Favorites",
"Retweets"))

ggsave("sociogram.png", width = 7, height = 7)