library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.3.2
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(ergm)
## Loading required package: statnet.common
## Loading required package: network
## network: Classes for Relational Data
## Version 1.13.0 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
##
## ergm: version 3.6.0, created on 2016-03-24
## Copyright (c) 2016, Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Carter T. Butts, University of California -- Irvine
## Steven M. Goodreau, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Martina Morris, University of Washington
## with contributions from
## Li Wang
## Kirk Li, University of Washington
## Skye Bender-deMoll, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ergm").
## NOTE: If you use custom ERGM terms based on 'ergm.userterms'
## version prior to 3.1, you will need to perform a one-time update
## of the package boilerplate files (the files that you did not write
## or modify) from 'ergm.userterms' 3.1 or later. See
## help('eut-upgrade') for instructions.
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:network':
##
## %c%, %s%, add.edges, add.vertices, delete.edges,
## delete.vertices, get.edge.attribute, get.edges,
## get.vertex.attribute, is.bipartite, is.directed,
## list.edge.attributes, list.vertex.attributes,
## set.edge.attribute, set.vertex.attribute
## The following objects are masked from 'package:dplyr':
##
## %>%, as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
##
## %>%, compose, simplify
## The following objects are masked from 'package:tidyr':
##
## %>%, crossing
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(GGally)
## Warning: package 'GGally' was built under R version 3.3.2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
library(ggplot2)
library(network)
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
data <- read_csv("~/dropbox/1_research/MSU Urban STEM SNA/like_data/all_data_likes_proc.csv")
## Parsed with column specification:
## cols(
## status_url = col_character(),
## date = col_datetime(format = ""),
## username = col_character(),
## text = col_character(),
## tweet_type = col_character(),
## numReplies = col_integer(),
## numFavs = col_integer(),
## favNames = col_character(),
## numRTs = col_integer(),
## rtNames = col_character(),
## replyNames = col_character(),
## mentionNames = col_character()
## )
print(data)
## # A tibble: 2,574 × 12
## status_url
## <chr>
## 1 https://twitter.com/_conorgalvin/status/618791263001812993
## 2 https://twitter.com/_valeriei/status/545783502919045120
## 3 https://twitter.com/1ofakindwalker/status/518229695662276608
## 4 https://twitter.com/1ofakindwalker/status/542882363437776896
## 5 https://twitter.com/1ofakindwalker/status/543177447458746368
## 6 https://twitter.com/1ofakindwalker/status/550854343352918016
## 7 https://twitter.com/2dinesh/status/506965037446352896
## 8 https://twitter.com/adafruit/status/530356448463970306
## 9 https://twitter.com/ahyo_yami/status/553571584774909952
## 10 https://twitter.com/ajsalts/status/501828452983377920
## # ... with 2,564 more rows, and 11 more variables: date <dttm>,
## # username <chr>, text <chr>, tweet_type <chr>, numReplies <int>,
## # numFavs <int>, favNames <chr>, numRTs <int>, rtNames <chr>,
## # replyNames <chr>, mentionNames <chr>
df <- data[data$tweet_type == "original" | data$tweet_type == "reply", ]
sender <- df$username
receiver <- ifelse(stringr::str_split(df$mentionNames, "\\*") != "", stringr::str_split(df$mentionNames, "\\*"), NA)
for (i in 1:length(receiver)){
if (length(receiver[[i]]) > 1 & df$tweet_type[i] == "reply"){
receiver[[i]][1] <- NA
}
}
men = stack(setNames(receiver, sender))[, 2:1]
names(men) <- c("sender", "receiver")
men$sender <- as.character(men$sender)
men <- men[!is.na(men$receiver), ]
men <- men %>% dplyr::mutate(var = "mention")
# replies
df <- data[data$tweet_type == "original" | data$tweet_type == "reply", ]
receiver <- df$username
sender <- ifelse(stringr::str_split(df$replyNames, "\\*") != "", stringr::str_split(df$replyNames, "\\*"), NA)
for (i in 1:length(sender)){
sender[[i]] <- unique(sender[[i]])
}
rep = stack(setNames(sender, receiver))[, 2:1]
names(rep) <- c("receiver", "sender")
rep <- rep[!is.na(rep$sender), ]
rep$sender <- as.character(rep$sender)
rep <- rep %>% dplyr::mutate(var = "reply")
rep$receiver <- as.character(rep$receiver)
str(rep)
## 'data.frame': 515 obs. of 3 variables:
## $ receiver: chr "akesha" "akesha" "akesha" "akesha" ...
## $ sender : chr "akesha" "piktochart" "la_stem" "akesha" ...
## $ var : chr "reply" "reply" "reply" "reply" ...
# favorites
df <- data[data$tweet_type == "original" | data$tweet_type == "reply", ]
receiver <- df$username
sender <- ifelse(stringr::str_split(df$favNames, "\\*") != "", stringr::str_split(df$favNames, "\\*"), NA)
fav = stack(setNames(sender, receiver))[, 2:1]
fav <- fav[complete.cases(fav), ]
names(fav) <- c("receiver", "sender")
fav$sender <- as.character(fav$sender)
fav <- fav[!is.na(fav$receiver), ]
fav <- fav %>% dplyr::mutate(var = "favorite")
fav <- dplyr::select(fav, sender, receiver, var)
fav <- fav[fav$sender != "", ]
fav$receiver <- as.character(fav$receiver)
# retweets
df <- data[data$tweet_type == "original" | data$tweet_type == "reply", ]
receiver <- df$username
sender <- ifelse(stringr::str_split(df$rtNames, "\\*") != "", stringr::str_split(df$rtNames, "\\*"), NA)
rt = stack(setNames(sender, receiver))[, 2:1]
rt <- rt[complete.cases(rt), ]
names(rt) <- c("receiver", "sender")
rt$sender <- as.character(rt$sender)
rt <- rt[!is.na(rt$receiver), ]
rt <- rt %>% dplyr::mutate(var = "retweet")
rt <- dplyr::select(rt, sender, receiver, var)
# all
all_df <- rbind(men, rep, fav, rt)
# dealing with membership data
membership <- read.csv("membership.csv", stringsAsFactors = F)
membership$users <- tolower(membership$users)
membership <- membership[complete.cases(membership),]
membership$membership <- car::recode(membership$membership, "c(2, 3) = 6; 4 = 7; c(1, 7, 6) = 4; 5 = 5; 10 = 1; 9 = 2; 11 = 3; 8 = 8")
membership$membership <- car::recode(membership$membership, "c(4, 5, 6, 7, 8) = 4")
membership <- dplyr::rename(membership, username = users)
membership$username <- tolower(membership$username)
membership <- rename(membership, username_membership = membership)
all_df$sender <- tolower(all_df$sender)
all_df <- dplyr::rename(all_df, username = sender)
all_df <- dplyr::left_join(all_df, membership, by = "username")
membership <- rename(membership, receiver = username, receiver_membership = username_membership)
str(membership)
## 'data.frame': 286 obs. of 2 variables:
## $ receiver : chr "1ofakindwalker" "_conorgalvin" "2dinesh" "adafruit" ...
## $ receiver_membership: num 4 4 4 4 4 3 4 4 4 4 ...
all_df <- dplyr::left_join(all_df, membership, by = "receiver")
all_df$username_membership <- ifelse(is.na(all_df$username_membership), 4, all_df$username_membership)
all_df$receiver_membership <- ifelse(is.na(all_df$receiver_membership), 4, all_df$receiver_membership)
all_df$username_membership <- car::recode(all_df$username_membership, "1 = '1st Cohort'; 2 = '2nd Cohort'; 3 = 'Instructional Team'; 4 = 'Other'")
all_df$receiver_membership <- car::recode(all_df$receiver_membership, "1 = '1st Cohort'; 2 = '2nd Cohort'; 3 = 'Instructional Team'; 4 = 'Other'")
all_df$username <- tolower(all_df$username)
all_df$receiver <- tolower(all_df$receiver)
all_df <- rename(all_df, membership = username_membership)
membership <- rename(membership, username = receiver,
membership = receiver_membership)
make_the_graph <- function(data, how_to_subset, membership){
all_df_ss <- dplyr::filter(data, var == how_to_subset) # change to "retweet" and "favorite"
mat <- as.matrix(all_df_ss[, 1:2])
g <- graph_from_edgelist(mat, directed = T)
g <- set_edge_attr(g, "weight", value = 1)
igraph::simplify(g, edge.attr.comb = list(weight = "sum"))
g <- simplify(g, remove.multiple = T, remove.loops = T, edge.attr.comb = list(weight = "sum"))
match_members <- function(g, membership){
x <- unclass(V(g))
y <- data.frame(username = names(x), stringsAsFactors = F)
membership_df <- membership
z <- dplyr::left_join(y, membership_df)
z <- dplyr::select(z, username, membership)
z$membership <- ifelse(!is.na(z$membership), z$membership, "Other")
#z$membership <- ifelse(z$membership == , "Other - Did Not Tweet", z$membership)
g <- set_vertex_attr(g, "group", value = as.character(z$membership))
return(g)
}
g <- match_members(g, membership)
g <- set_vertex_attr(g, "degree", value = igraph::degree(g, mode = "all"))
x <- data.frame(names = names(igraph::degree(g, mode = "in")),
deg = igraph::degree(g, mode = "all"))
p <- ggnet2(g,
size = "indegree",
color = "group",
palette = "Set1",
directed = T,
arrow.size = 3,
arrow.gap = .025) +
# mode = "fruchtermanreingold", layout.par = list(repulse.rad=1)) +
theme(legend.title=element_blank()) +
ggtitle(Hmisc::capitalize(how_to_subset)) +
guides(size = F)
out <- list(g, p)
return(out)
}
g_mentions <- make_the_graph(all_df, "mention", membership)
## Joining, by = "username"
## Loading required package: sna
## sna: Tools for Social Network Analysis
## Version 2.4 created on 2016-07-23.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## For citation information, type citation("sna").
## Type help(package="sna") to get started.
##
## Attaching package: 'sna'
## The following objects are masked from 'package:igraph':
##
## betweenness, bonpow, closeness, components, degree,
## dyad.census, evcent, hierarchy, is.connected, neighborhood,
## triad.census
## Loading required package: scales
## Warning: package 'scales' was built under R version 3.3.2
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following objects are masked from 'package:readr':
##
## col_factor, col_numeric
g_replies <- make_the_graph(all_df, "reply", membership)
## Joining, by = "username"
g_retweets <- make_the_graph(all_df, "retweet", membership)
## Joining, by = "username"
g_favorites <- make_the_graph(all_df, "favorite", membership)
## Joining, by = "username"
legend_b <- get_legend(g_mentions[[2]] +
theme(legend.position="bottom"))
prow <-
plot_grid(g_mentions[[2]] + theme(legend.position="none"),
g_replies[[2]] + theme(legend.position="none"),
g_retweets[[2]] + theme(legend.position="none"),
g_favorites[[2]] + theme(legend.position="none"),
nrow = 2)
title <- ggdraw() + draw_label("#MSUrbanSTEM Interactions", fontface='bold', size = 20)
p <- plot_grid(title, prow, legend_b, nrow = 3, rel_heights = c(.05, 1, .075))
# ggsave("plot.png", width = 12, height = 12)
This is for favorites.
df_men <- intergraph::asNetwork(g_men[[1]])
df_fav <- intergraph::asNetwork(g_fav[[1]])
df_rt <- intergraph::asNetwork(g_rt[[1]])
df_rep <- intergraph::asNetwork(g_rep[[1]])
model_men <- ergm(df_men ~ edges +
mutual +
nodefactor("group", base = 4) +
nodemix("group", base = c(4, 8, 12, 13, 14, 15, 16)) +
nodecov("degree"))
model_fav <- ergm(df_fav ~ edges +
mutual +
nodefactor("group", base = 4) +
nodemix("group", base = c(1, 4, 6, 8, 11, 12, 13, 14, 15, 16)) +
nodecov("degree") +
nodematch("group", diff = F))
summary(model_fav)
model_rt <- ergm(df_rt ~ edges +
mutual +
nodefactor("group", base = 4) +
nodemix("group")) +
nodecov("degree"))
g_rt[[2]]
summary(model_rt)
model_rep <- ergm(df_rep ~ edges +
mutual +
nodefactor("group", base = 4) +
nodemix("group", base = c(4, 8, 12, 13, 14, 15, 16)) +
nodecov("degree"))
g_rep[[2]]
summary(model_rep)
# scratch
the_var = c("favorite", "mention", "reply", "retweet")
tbl_df(all_df)
## # A tibble: 4,348 × 5
## username receiver var membership receiver_membership
## <chr> <chr> <chr> <chr> <chr>
## 1 akesha carcharhinid mention Instructional Team 1st Cohort
## 2 akesha bob_ferrer mention Instructional Team 1st Cohort
## 3 akesha mra108 mention Instructional Team Other
## 4 akesha mtspec mention Instructional Team 1st Cohort
## 5 akesha sauganashteach mention Instructional Team 1st Cohort
## 6 akesha sauganashteach mention Instructional Team 1st Cohort
## 7 akesha shackkyle mention Instructional Team Other
## 8 akesha piktochart mention Instructional Team Other
## 9 akesha piktochart mention Instructional Team Other
## 10 akesha canmarcotte mention Instructional Team Instructional Team
## # ... with 4,338 more rows
all_df <- rename(all_df, username_membership = membership)
all_df_fav <- filter(all_df, var == the_var[1])
all_df_men <- filter(all_df, var == the_var[2])
all_df_rep <- filter(all_df, var == the_var[3])
all_df_rt <- filter(all_df, var == the_var[4])
tab_favorite <- table(all_df_fav$username_membership, all_df_fav$receiver_membership)
tab_mention <- table(all_df_men$username_membership, all_df_men$receiver_membership)
tab_reply <- table(all_df_rep$username_membership, all_df_rep$receiver_membership)
tab_retweet <- table(all_df_rt$username_membership, all_df_rt$receiver_membership)
arr <- abind::abind(list(tab_favorite, tab_mention, tab_reply, tab_retweet), along = 3)
names(dimnames(arr)) <- c("sender", "receiver", "interaction")
m.sat <- MASS::loglm( ~ sender + receiver + interaction, arr)
m.sat_out <- as.data.frame(resid(m.sat))
## Re-fitting to get frequencies and fitted values
arr <- as.data.frame(arr)
df_fav <- data.frame(sender = rep(row.names(arr[, 1:4]), 4),
gather(arr[, 1:4], receiver, n),
freq = select(gather(m.sat_out[, 1:4], key, freq), freq))
df_fav$receiver <- stringr::str_sub(df_fav$receiver, end = -3)
df_fav$sig <- ifelse(df_fav$freq >= 1.96, "+",
ifelse(df_fav$freq <= -1.96, "-", NA))
df_men <- data.frame(sender = rep(row.names(arr[, 5:8]), 4),
gather(arr[, 5:8], receiver, n),
select(gather(m.sat_out[, 5:8], key, freq), freq))
df_men$receiver <- stringr::str_sub(df_men$receiver, end = -3)
df_men$sig <- ifelse(df_men$freq >= 1.96, "+",
ifelse(df_men$freq <= -1.96, "-", NA))
df_rep <- data.frame(sender = rep(row.names(arr[, 9:12]), 4),
gather(arr[, 9:12], receiver, n),
select(gather(m.sat_out[, 9:12], key, freq), freq))
df_rep$receiver <- stringr::str_sub(df_rep$receiver, end = -3)
df_rep$sig <- ifelse(df_rep$freq >= 1.96, "+",
ifelse(df_rep$freq <= -1.96, "-", NA))
df_rt <- data.frame(sender = rep(row.names(arr[, 13:16]), 4),
gather(arr[, 13:16], receiver, n),
select(gather(m.sat_out[, 13:16], key, freq), freq))
df_rt$receiver <- stringr::str_sub(df_rt$receiver, end = -3)
df_rt$sig <- ifelse(df_rt$freq >= 1.96, "+",
ifelse(df_rt$freq <= -1.96, "-", NA))
p_fav <-
df_fav %>%
ggplot(aes(x = sender, y = n, fill = receiver, label = sig)) +
geom_col() +
ggtitle("Favorites") +
geom_text(position = position_stack(.5)) +
ylab(NULL) +
xlab(NULL) +
# ylim(0, 1200) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
p_men <-
df_men %>%
ggplot(aes(x = sender, y = n, fill = receiver, label = sig)) +
geom_col() +
ggtitle("Mentions") +
geom_text(position = position_stack(.5)) +
ylab(NULL) +
xlab(NULL) +
# ylim(0, 1200) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
p_rep <-
df_rep %>%
ggplot(aes(x = sender, y = n, fill = receiver, label = sig)) +
geom_col() +
ggtitle("Replies") +
geom_text(position = position_stack(.5)) +
ylab(NULL) +
xlab(NULL) +
# ylim(0, 1200) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
p_rt <-
df_rt %>%
ggplot(aes(x = sender, y = n, fill = receiver, label = sig)) +
geom_col() +
ggtitle("Retweets") +
geom_text(position = position_stack(.5)) +
ylab(NULL) +
xlab(NULL) +
# ylim(0, 1200) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# instructional team is not likely to interact with instructional team
# 1st and 2nd cohort are not likely to interact
# nobody is likely to mention other, and other are less likely to mention other other (weird wording)
legend <- get_legend(p_fav + theme(legend.position = "bottom"))
## Warning: Removed 7 rows containing missing values (geom_text).
pnew <-
plot_grid(p_men + theme(legend.position="none"),
p_rep + theme(legend.position="none"),
p_rt + theme(legend.position="none"),
p_fav + theme(legend.position="none"),
nrow = 2)
## Warning: Removed 4 rows containing missing values (geom_text).
## Warning: Removed 6 rows containing missing values (geom_text).
## Warning: Removed 6 rows containing missing values (geom_text).
## Warning: Removed 7 rows containing missing values (geom_text).
pnew
title <- ggdraw() + draw_label("#MSUrbanSTEM Interactions", fontface = 'bold', size = 20)
p <- plot_grid(title, prow, legend, nrow = 3, rel_heights = c(.05, 1, .075))
p + labs(caption = "Note. Significance tests are evaluated using log-linear models. The y-axis scales differ between interactions.")