Todo:
library(tidyverse)
library(igraph)
library(lme4)
library(sjstats)
library(rtweet)
library(rTAGS) # install with devtools::install_github('bretsw/rTAGS)
library(brms)
# source('prepare-data.R') # this creates the two files with 'to_analyze' in their name, as read below
For notes on this one-time setup, see this walkthrough: http://centerforopenscience.github.io/osfr/articles/auth.html)
First, you must generate an API token from an OSF account that has been added to the data repository. Read how to do this here: https://developer.osf.io/#tag/General-Usage
Then, you need to add the OSF API token to the .renviron
file, which is created using the following command. Here, the file is created at the user level, although this could also be set to the project level.
usethis::edit_r_environ(scope='user')
Open the .renviron
file and add a single line, using this exact text but replacing <token>
with your OSF API token:
OSF_PAT="<token>"
Save the file, quit R, and restart in a new session. Continue running the R script from here.
Now, install the osfr
package and load the library:
library(osfr)
Upon loading the osfr
package, you should see this message:
Automatically registered OSF personal access token.
Now you are able to retrieve and download the relevant datasets with this code:
# all-ngsschat-tweets.csv
osf_retrieve_file("https://osf.io/k2w6t/") %>%
osf_download(path = "ngsschat-tweets-14-15.rds", overwrite = TRUE)
# orig-pre.csv
osf_retrieve_file("https://osf.io/nj8yk/") %>%
osf_download(path = "orig-pre-14.csv", overwrite = TRUE)
# orig-post.csv
osf_retrieve_file("https://osf.io/ngwpt/") %>%
osf_download(path = "orig-post-15.csv", overwrite = TRUE)
# users-to-analyze.csv
osf_retrieve_file("https://osf.io/jz7p6/") %>%
osf_download(path = "users-to-analyze.csv", overwrite = TRUE)
# edgelist-to-analyze.csv
osf_retrieve_file("https://osf.io/sbyn9/") %>%
osf_download(path = "edgelist-to-analyze.csv", overwrite = TRUE)
orig <- read_rds("ngsschat-tweets-14-15.rds") # original data
orig_pre <- read_csv("orig-pre-14.csv") # data for the year after
orig_post <- read_csv("orig-post-15.csv") # data for the year before
users <- read_csv("users-to-analyze.csv") # processed user data
edge <- read_csv("edgelist-to-analyze.csv") # processed edgelist data
ts_plot(orig) +
geom_vline(xintercept = as.POSIXct(as.Date("2014-08-01"))) +
geom_vline(xintercept = as.POSIXct(as.Date("2015-07-31"))) +
theme_bw() +
xlab("Day") +
ylab("Number of Tweets including #NGSSchat per day") +
xlim(c(as.POSIXct(as.Date("2012-01-01")), as.POSIXct(as.Date("2017-12-31")))) +
geom_rect(aes(xmin = as.POSIXct(as.Date(c("2014-08-01"))), xmax = as.POSIXct(as.Date(c("2015-07-31"))),
ymin = -Inf,
ymax = Inf),
fill = "cyan3", alpha = 0.01)
# loc <- osm_geocode(users$location, key = MQ_API_KEY)
# l <- as.list(users$location) %>%
# map(osm_geocode, key = MQ_API_KEY)
#
# write_rds(l, "geocoded-locations.rds")
l <- read_rds("geocoded-locations.rds")
lats <- l %>%
purrr::map(~.$lat) %>%
modify_if(is.null, ~ NA)
lons <- l %>%
purrr::map(~.$lon) %>%
modify_if(is.null, ~ NA)
display_name <- l %>%
purrr::map(~.$display_name) %>%
modify_if(is.null, ~ NA)
users$lat <- unlist(lats)
users$lon <- unlist(lons)
users$display_name <- unlist(display_name)
states <- map_data("state")
ggplot(data = states) +
geom_polygon(aes(x = long, y = lat, group = group), color = "black", fill = 'white') +
geom_point(data = filter(users, str_detect(display_name, "United States")), aes(x = lon, y = lat, color = group, size = n_tweets)) +
coord_fixed(1.3) +
guides(fill=FALSE) + # do this to leave off the color legend
coord_map("stereographic") +
ggthemes::theme_map() +
scale_color_viridis_d("Group") +
scale_size_continuous("Number of Original Tweets ('14-'15")
users <- select(users, -account_lang, -user_id) # this var seems to be all NA
# overall
orig %>%
mutate(screen_name = tolower(screen_name)) %>%
filter(!is_retweet) %>%
left_join(users) %>%
semi_join(users) %>%
filter(n_tweets >= 2) %>%
count(screen_name) %>%
rename(n_tweets = n) # 230 users w/ more than one tweet
## # A tibble: 191 x 2
## screen_name n_tweets
## <chr> <int>
## 1 21stscied 13
## 2 2footgiraffe 58
## 3 achavez_science 3
## 4 adchempages 28
## 5 aeolani 8
## 6 ajollygal 2
## 7 aliciajohal 5
## 8 all4ed 2
## 9 alynnmeyer 34
## 10 amycoyote 9
## # … with 181 more rows
orig %>%
mutate(screen_name = tolower(screen_name)) %>%
filter(!is_retweet) %>%
left_join(users) %>%
semi_join(users) %>%
filter(n_tweets >= 2) %>%
count(screen_name) %>%
summarize(median_n = median(n),
mean_n = mean(n),
sd_n = sd(n))
## # A tibble: 1 x 3
## median_n mean_n sd_n
## <int> <dbl> <dbl>
## 1 12 33.3 90.7
# by group
du <- orig %>%
mutate(screen_name = tolower(screen_name)) %>%
filter(!is_retweet) %>%
left_join(users) %>%
semi_join(users) %>%
filter(n_tweets >= 2) %>%
mutate(screen_name = tolower(screen_name)) %>%
filter(!is.na(group))
n_tweeters_by_group <- du %>%
filter(!is_retweet) %>%
count(group, screen_name) %>%
count(group) %>%
rename(n_tweeters = n) %>%
mutate(n_prop = n_tweeters / sum(n_tweeters)) %>%
arrange(desc(n_tweeters))
n_tweets_by_group <- du %>%
filter(!is_retweet) %>%
count(group, screen_name) %>%
group_by(group) %>%
summarize(sum_n_tweets = sum(n))
# this is individual engagement by group - probably what we want to report
fin_df <- du %>%
filter(!is_retweet) %>%
count(group, screen_name) %>%
group_by(group) %>%
summarize(mean_n_tweets = mean(n),
sd_n_tweets = sd(n)) %>%
right_join(n_tweeters_by_group) %>%
right_join(n_tweets_by_group) %>%
select(group, sum_n_tweets, n_tweeters, mean_n_tweets, sd_n_tweets) %>%
arrange(desc(sum_n_tweets))
fin_df
## # A tibble: 6 x 5
## group sum_n_tweets n_tweeters mean_n_tweets sd_n_tweets
## <chr> <int> <int> <dbl> <dbl>
## 1 Teacher 2653 78 34.0 90.9
## 2 Administrator 2421 46 52.6 135.
## 3 Researcher 600 14 42.9 54.9
## 4 Other 441 22 20.0 29.2
## 5 Unclear 184 23 8 7.53
## 6 Organization 67 8 8.38 4.50
chisq.test(fin_df$sum_n_tweets) # there are sig diffs across sum_n_tweets
##
## Chi-squared test for given probabilities
##
## data: fin_df$sum_n_tweets
## X-squared = 6350.8, df = 5, p-value < 2.2e-16
cst <- chisq.test(fin_df$sum_n_tweets)
cst$stdres
## [1] 53.53971 45.73744 -15.50365 -20.85089 -29.49392 -33.42869
cst <- chisq.test(fin_df$sum_n_tweets[1:2])
cst$stdres # there does seem to be a diff in n between teachers and admin
## [1] 3.256962 -3.256962
dc <- edge %>%
filter(interaction_type == "conversing")
g <- graph_from_data_frame(dc)
m <- as_adjacency_matrix(g, sparse = FALSE) # sender is row, receiver is column
t <- m %>%
as.data.frame() %>%
rownames_to_column("sender") %>%
gather(receiver, val, -sender) %>%
as_tibble()
tt <- add_users_data(t, users)
tt <- mutate(tt,
code_sender = ifelse(is.na(code_sender), 11, code_sender),
code_receiver = ifelse(is.na(code_receiver), 11, code_receiver)) %>%
# filter(code_sender != 11 & code_receiver !=11) %>%
mutate(group_sender = recode(code_sender,
`1` = "Teacher",
`2` = "Administrator",
`3` = "Administrator",
`4` = "Researcher",
`5` = "Other",
`8` = "Other",
`9` = "Other",
`10` = "Other",
`6` = "Organization",
`7` = "Organization",
`11` = "Other"),
group_receiver = recode(code_receiver,
`1` = "Teacher",
`2` = "Administrator",
`3` = "Administrator",
`4` = "Researcher",
`5` = "Other",
`8` = "Other",
`9` = "Other",
`10` = "Other",
`6` = "Organization",
`7` = "Organization",
`11` = "Other"))
tt$group_receiver <- fct_relevel(as.factor(tt$group_receiver), "Other")
tt$group_sender <- fct_relevel(as.factor(tt$group_sender), "Other")
tt$dic <- ifelse(tt$val > 0, 1, 0)
tt$same <- ifelse(tt$group_sender == tt$group_receiver, 1, 0)
mc1 <- brm(val ~ 1 +
(1|sender) + (1|receiver),
iter = 800, chains = 3, cores = 3,
control=list(adapt_delta=0.99,
max_treedepth=10),
family = 'poisson',
data = tt)
summary(mc1)
## Family: poisson
## Links: mu = log
## Formula: val ~ 1 + (1 | sender) + (1 | receiver)
## Data: tt (Number of observations: 117649)
## Samples: 3 chains, each with iter = 800; warmup = 400; thin = 1;
## total post-warmup samples = 1200
##
## Group-Level Effects:
## ~receiver (Number of levels: 343)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 2.42 0.15 2.15 2.71 71 1.02
##
## ~sender (Number of levels: 242)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 1.86 0.11 1.67 2.11 36 1.06
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept -6.12 0.18 -6.48 -5.79 70 1.03
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
mc1 <- brm(val ~ 1 +
group_sender +
group_receiver +
(1|sender) + (1|receiver),
iter = 800, chains = 3, cores = 3,
control=list(adapt_delta=0.99,
max_treedepth=10),
family = 'poisson',
data = tt)
summary(mc1)
## Family: poisson
## Links: mu = log
## Formula: val ~ 1 + group_sender + group_receiver + (1 | sender) + (1 | receiver)
## Data: tt (Number of observations: 117649)
## Samples: 3 chains, each with iter = 800; warmup = 400; thin = 1;
## total post-warmup samples = 1200
##
## Group-Level Effects:
## ~receiver (Number of levels: 343)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 2.37 0.14 2.09 2.61 48 1.05
##
## ~sender (Number of levels: 242)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 1.85 0.11 1.66 2.07 54 1.04
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI
## Intercept -7.16 0.38 -7.90 -6.37
## group_senderAdministrator 0.71 0.37 -0.03 1.39
## group_senderOrganization 0.03 0.54 -1.01 1.11
## group_senderResearcher 0.83 0.46 0.04 1.76
## group_senderTeacher 0.63 0.32 -0.09 1.23
## group_receiverAdministrator 0.80 0.44 -0.03 1.61
## group_receiverOrganization -0.13 0.70 -1.40 1.37
## group_receiverResearcher 1.10 0.57 -0.03 2.18
## group_receiverTeacher 0.91 0.36 0.07 1.60
## Eff.Sample Rhat
## Intercept 20 1.11
## group_senderAdministrator 56 1.07
## group_senderOrganization 105 1.01
## group_senderResearcher 88 1.06
## group_senderTeacher 15 1.15
## group_receiverAdministrator 34 1.08
## group_receiverOrganization 43 1.04
## group_receiverResearcher 57 1.08
## group_receiverTeacher 9 1.27
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
mc2 <- brm(val ~ 1 +
group_sender +
group_receiver +
same +
(1|sender) + (1|receiver),
iter = 800, chains = 3, cores = 3,
control=list(adapt_delta=0.99,
max_treedepth=10),
family = 'poisson',
data = tt)
summary(mc2)
## Family: poisson
## Links: mu = log
## Formula: val ~ 1 + group_sender + group_receiver + same + (1 | sender) + (1 | receiver)
## Data: tt (Number of observations: 117649)
## Samples: 3 chains, each with iter = 800; warmup = 400; thin = 1;
## total post-warmup samples = 1200
##
## Group-Level Effects:
## ~receiver (Number of levels: 343)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 2.37 0.13 2.14 2.64 113 1.02
##
## ~sender (Number of levels: 242)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 1.87 0.11 1.68 2.14 73 1.04
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI
## Intercept -7.13 0.41 -7.90 -6.26
## group_senderAdministrator 0.75 0.40 -0.07 1.58
## group_senderOrganization -0.07 0.62 -1.39 1.16
## group_senderResearcher 0.79 0.57 -0.39 1.79
## group_senderTeacher 0.63 0.33 -0.07 1.27
## group_receiverAdministrator 0.74 0.39 -0.09 1.44
## group_receiverOrganization -0.14 0.64 -1.41 1.10
## group_receiverResearcher 1.18 0.56 0.12 2.23
## group_receiverTeacher 0.91 0.35 0.24 1.61
## same 0.09 0.02 0.05 0.14
## Eff.Sample Rhat
## Intercept 15 1.23
## group_senderAdministrator 47 1.07
## group_senderOrganization 92 1.03
## group_senderResearcher 72 1.01
## group_senderTeacher 45 1.02
## group_receiverAdministrator 37 1.14
## group_receiverOrganization 51 1.07
## group_receiverResearcher 84 1.02
## group_receiverTeacher 22 1.17
## same 1229 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
mc3 <- brm(val ~ 1 +
scale(years_on_twitter_sender) + scale(n_tweets_sender) + group_sender +
scale(years_on_twitter_receiver) + scale(n_tweets_receiver) + group_receiver +
same +
(1|sender) + (1|receiver),
iter = 1800, chains = 3, cores = 3,
family = 'poisson',
control=list(adapt_delta=0.99,
max_treedepth=10),
data = tt)
summary(mc3)
## Family: poisson
## Links: mu = log
## Formula: val ~ 1 + scale(years_on_twitter_sender) + scale(n_tweets_sender) + group_sender + scale(years_on_twitter_receiver) + scale(n_tweets_receiver) + group_receiver + same + (1 | sender) + (1 | receiver)
## Data: tt (Number of observations: 117649)
## Samples: 3 chains, each with iter = 1800; warmup = 900; thin = 1;
## total post-warmup samples = 2700
##
## Group-Level Effects:
## ~receiver (Number of levels: 343)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 2.29 0.13 2.06 2.57 293 1.00
##
## ~sender (Number of levels: 242)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 1.53 0.09 1.36 1.72 208 1.03
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI
## Intercept -6.61 0.33 -7.29 -5.96
## scaleyears_on_twitter_sender 0.24 0.09 0.05 0.43
## scalen_tweets_sender 1.03 0.12 0.78 1.27
## group_senderAdministrator 0.28 0.30 -0.27 0.88
## group_senderOrganization -0.32 0.48 -1.22 0.62
## group_senderResearcher 0.35 0.41 -0.44 1.13
## group_senderTeacher 0.23 0.26 -0.24 0.74
## scaleyears_on_twitter_receiver 0.28 0.14 -0.01 0.55
## scalen_tweets_receiver 0.38 0.13 0.12 0.62
## group_receiverAdministrator 0.54 0.38 -0.20 1.25
## group_receiverOrganization -0.28 0.58 -1.36 0.92
## group_receiverResearcher 0.88 0.57 -0.19 2.03
## group_receiverTeacher 0.75 0.34 0.07 1.38
## same 0.09 0.02 0.05 0.14
## Eff.Sample Rhat
## Intercept 104 1.02
## scaleyears_on_twitter_sender 243 1.01
## scalen_tweets_sender 291 1.01
## group_senderAdministrator 186 1.00
## group_senderOrganization 270 1.01
## group_senderResearcher 308 1.02
## group_senderTeacher 188 1.01
## scaleyears_on_twitter_receiver 133 1.01
## scalen_tweets_receiver 488 1.01
## group_receiverAdministrator 127 1.01
## group_receiverOrganization 196 1.02
## group_receiverResearcher 136 1.01
## group_receiverTeacher 100 1.02
## same 3669 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
dc <- edge %>%
filter(interaction_type == "endorsing")
g <- graph_from_data_frame(dc)
m <- as_adjacency_matrix(g, sparse = FALSE) # sender is row, receiver is column
t <- m %>%
as.data.frame() %>%
rownames_to_column("sender") %>%
gather(receiver, val, -sender) %>%
as_tibble()
tt <- add_users_data(t, users)
tt <- mutate(tt,
code_sender = ifelse(is.na(code_sender), 11, code_sender),
code_receiver = ifelse(is.na(code_receiver), 11, code_receiver)) %>%
# filter(code_sender != 11 & code_receiver !=11) %>%
mutate(group_sender = recode(code_sender,
`1` = "Teacher",
`2` = "Administrator",
`3` = "Administrator",
`4` = "Researcher",
`5` = "Other",
`8` = "Other",
`9` = "Other",
`10` = "Other",
`6` = "Organization",
`7` = "Organization",
`11` = "Other"),
group_receiver = recode(code_receiver,
`1` = "Teacher",
`2` = "Administrator",
`3` = "Administrator",
`4` = "Researcher",
`5` = "Other",
`8` = "Other",
`9` = "Other",
`10` = "Other",
`6` = "Organization",
`7` = "Organization",
`11` = "Other"))
tt$group_receiver <- fct_relevel(as.factor(tt$group_receiver), "Other")
tt$group_sender <- fct_relevel(as.factor(tt$group_sender), "Other")
tt$same <- ifelse(tt$group_sender == tt$group_receiver, 1, 0)
me1 <- brm(val ~ 1 +
(1|sender) + (1|receiver),
iter = 600, chains = 3, cores = 3,
family = 'poisson',
data = tt)
summary(me1)
me1 <- brm(val ~ 1 +
group_sender +
group_receiver +
(1|sender) + (1|receiver),
iter = 600, chains = 3, cores = 3,
family = 'poisson',
data = tt)
summary(me1)
me2 <- brm(val ~ 1 +
group_sender +
group_receiver +
same +
(1|sender) + (1|receiver),
iter = 600, chains = 3, cores = 3,
family = 'poisson',
data = tt)
summary(me2)
me3 <- brm(val ~ 1 +
scale(years_on_twitter_sender) + scale(n_tweets_sender) + group_sender +
scale(years_on_twitter_receiver) + scale(n_tweets_receiver) + group_receiver +
same +
(1|sender) + (1|receiver),
iter = 600, chains = 3, cores = 3,
family = 'poisson',
data = tt)
summary(me3)
n_days <- orig_pre %>%
mutate(screen_name = tolower(screen_name)) %>%
filter(!is_retweet) %>%
count(screen_name, day) %>%
count(screen_name) %>%
select(screen_name, pre_n_days = n)
orig_pre <- orig_pre %>%
mutate(screen_name = tolower(screen_name)) %>%
filter(!is_retweet) %>%
count(screen_name) %>%
select(screen_name, pre_n = n) %>%
left_join(n_days)
n_days <- orig_post %>%
mutate(screen_name = tolower(screen_name)) %>%
filter(!is_retweet) %>%
count(screen_name, day) %>%
count(screen_name) %>%
select(screen_name, post_n_days = n)
orig_post <- orig_post %>%
mutate(screen_name = tolower(screen_name)) %>%
filter(!is_retweet) %>%
count(screen_name) %>%
select(screen_name, post_n = n) %>%
left_join(n_days)
d_for_influence <- users %>%
left_join(orig_pre) %>%
left_join(orig_post) %>%
distinct(screen_name, .keep_all = TRUE) %>%
select(screen_name, pre_n, pre_n_days, n_tweets, post_n, post_n_days) %>%
filter(n_tweets > 1) %>%
mutate_all(replace_na, 0)
orig_pre <- rename(orig_pre, sender = screen_name)
influence_endorsing <- edge %>%
filter(interaction_type == "endorsing") %>%
count(sender, receiver) %>%
left_join(orig_pre) %>%
mutate(exposure = n * pre_n) %>%
group_by(receiver) %>%
summarize(exposure_sum_end = sum(exposure, na.rm = TRUE)) %>%
rename(screen_name = receiver) %>%
right_join(d_for_influence) %>%
mutate(exposure_sum_end = replace_na(exposure_sum_end, 0)) %>%
left_join(users) %>%
mutate(group= ifelse(group %in% c("Other", "Unclear", "Uncoded"), "Other", group))
influence_conversing <- edge %>%
filter(interaction_type == "conversing") %>%
count(sender, receiver) %>%
left_join(orig_pre) %>%
mutate(exposure = n * pre_n) %>%
group_by(receiver) %>%
summarize(exposure_sum_conv = sum(exposure, na.rm = TRUE)) %>%
rename(screen_name = receiver) %>%
right_join(d_for_influence) %>%
mutate(exposure_sum_conv = replace_na(exposure_sum_conv, 0)) %>%
left_join(users) %>%
mutate(group= ifelse(group %in% c("Other", "Unclear", "Uncoded"), "Other", group))
influence <- influence_endorsing %>%
left_join(influence_conversing) %>%
left_join(users) %>%
mutate(group = ifelse(group %in% c("Other", "Unclear", "Uncoded"), "Other", group))
influence$group <- fct_relevel(as.factor(influence$group), "Other")
influence %>%
select(pre_n,
pre_n_days,
post_n,
post_n_days,
n_tweets,
n_days,
exposure_sum_end,
exposure_sum_conv,
years_on_twitter) %>%
corrr::correlate() %>%
corrr::shave() %>%
corrr::fashion()
## rowname pre_n pre_n_days post_n post_n_days n_tweets n_days
## 1 pre_n
## 2 pre_n_days .72
## 3 post_n .86 .56
## 4 post_n_days .40 .45 .70
## 5 n_tweets .92 .61 .94 .56
## 6 n_days .51 .63 .67 .79 .69
## 7 exposure_sum_end -.01 .01 .07 .17 .11 .22
## 8 exposure_sum_conv .04 .20 .17 .42 .21 .49
## 9 years_on_twitter .03 .08 .01 .05 .06 .12
## exposure_sum_end exposure_sum_conv years_on_twitter
## 1
## 2
## 3
## 4
## 5
## 6
## 7
## 8 .57
## 9 .00 .18
influence <- mutate(influence,
code = ifelse(is.na(code), 11, code)) %>%
mutate(code_category = recode(code,
`1` = "Teacher",
`2` = "Administrator",
`3` = "Administrator",
`4` = "Researcher",
`5` = "Other",
`8` = "Other",
`9` = "Other",
`10` = "Unclear",
`6` = "Organization",
`7` = "Organization",
`11` = "Unclear")) %>%
mutate(code_category = as.factor(code_category),
code_category = fct_relevel(code_category, "Unclear"))
m0 <- glm(post_n ~ 1,
data = influence,
family = 'poisson')
summary(m0)
##
## Call:
## glm(formula = post_n ~ 1, family = "poisson", data = influence)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -6.982 -6.982 -6.982 -2.469 74.711
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.19339 0.01286 248.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 23423 on 247 degrees of freedom
## Residual deviance: 23423 on 247 degrees of freedom
## AIC: 23934
##
## Number of Fisher Scoring iterations: 7
m1 <- glm(post_n ~ 1 +
code_category,
data = influence,
family = 'poisson')
summary(m1)
##
## Call:
## glm(formula = post_n ~ 1 + code_category, family = "poisson",
## data = influence)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -9.530 -7.134 -4.979 -2.482 66.041
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.15145 0.08575 13.43 <2e-16 ***
## code_categoryAdministrator 2.66439 0.08814 30.23 <2e-16 ***
## code_categoryOrganization 1.96035 0.10381 18.88 <2e-16 ***
## code_categoryOther 1.36567 0.10117 13.50 <2e-16 ***
## code_categoryResearcher 2.16806 0.09737 22.27 <2e-16 ***
## code_categoryTeacher 2.08514 0.08815 23.65 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 23423 on 247 degrees of freedom
## Residual deviance: 21174 on 242 degrees of freedom
## AIC: 21695
##
## Number of Fisher Scoring iterations: 7
m2 <- glm(post_n ~ 1 +
scale(n_tweets) +
scale(years_on_twitter) +
scale(n_days) +
code_category,
data = influence,
family = 'poisson')
summary(m2)
##
## Call:
## glm(formula = post_n ~ 1 + scale(n_tweets) + scale(years_on_twitter) +
## scale(n_days) + code_category, family = "poisson", data = influence)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -9.5813 -3.9681 -3.0296 0.0528 22.4589
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.439307 0.085995 16.737 < 2e-16 ***
## scale(n_tweets) 0.121054 0.004479 27.030 < 2e-16 ***
## scale(years_on_twitter) -0.070628 0.016677 -4.235 2.28e-05 ***
## scale(n_days) 0.748193 0.010513 71.169 < 2e-16 ***
## code_categoryAdministrator 1.090255 0.091630 11.898 < 2e-16 ***
## code_categoryOrganization 0.792991 0.106839 7.422 1.15e-13 ***
## code_categoryOther 1.195105 0.101332 11.794 < 2e-16 ***
## code_categoryResearcher 1.099022 0.103011 10.669 < 2e-16 ***
## code_categoryTeacher 0.871716 0.090943 9.585 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 23423.1 on 247 degrees of freedom
## Residual deviance: 5733.8 on 239 degrees of freedom
## AIC: 6260.6
##
## Number of Fisher Scoring iterations: 7
m2 <- glm(post_n ~ 1 +
scale(years_on_twitter) +
scale(n_tweets) +
scale(n_days) +
scale(pre_n) +
code_category,
data = influence,
family = 'poisson')
summary(m2)
##
## Call:
## glm(formula = post_n ~ 1 + scale(years_on_twitter) + scale(n_tweets) +
## scale(n_days) + scale(pre_n) + code_category, family = "poisson",
## data = influence)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -10.665 -3.872 -2.704 -0.306 21.250
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.44143 0.08600 16.761 < 2e-16 ***
## scale(years_on_twitter) -0.05316 0.01673 -3.178 0.00148 **
## scale(n_tweets) 0.49234 0.03568 13.798 < 2e-16 ***
## scale(n_days) 0.64336 0.01448 44.436 < 2e-16 ***
## scale(pre_n) -0.31536 0.02993 -10.537 < 2e-16 ***
## code_categoryAdministrator 1.03717 0.09199 11.275 < 2e-16 ***
## code_categoryOrganization 0.69980 0.10749 6.510 7.5e-11 ***
## code_categoryOther 1.17279 0.10139 11.567 < 2e-16 ***
## code_categoryResearcher 0.99286 0.10351 9.592 < 2e-16 ***
## code_categoryTeacher 0.92756 0.09095 10.199 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 23423.1 on 247 degrees of freedom
## Residual deviance: 5619.2 on 238 degrees of freedom
## AIC: 6148.1
##
## Number of Fisher Scoring iterations: 7
m3 <- glm(post_n ~ 1 +
scale(years_on_twitter) +
scale(n_tweets) +
scale(n_days) +
scale(pre_n) +
code_category +
exposure_sum_conv,
data = influence,
family = 'poisson')
summary(m3)
##
## Call:
## glm(formula = post_n ~ 1 + scale(years_on_twitter) + scale(n_tweets) +
## scale(n_days) + scale(pre_n) + code_category + exposure_sum_conv,
## family = "poisson", data = influence)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.7126 -3.8042 -2.7984 -0.4321 21.9750
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.405e+00 8.608e-02 16.320 < 2e-16 ***
## scale(years_on_twitter) -9.346e-02 1.692e-02 -5.524 3.31e-08 ***
## scale(n_tweets) 5.067e-01 3.503e-02 14.466 < 2e-16 ***
## scale(n_days) 6.092e-01 1.463e-02 41.632 < 2e-16 ***
## scale(pre_n) -3.070e-01 2.946e-02 -10.421 < 2e-16 ***
## code_categoryAdministrator 9.683e-01 9.248e-02 10.470 < 2e-16 ***
## code_categoryOrganization 7.917e-01 1.079e-01 7.339 2.15e-13 ***
## code_categoryOther 1.190e+00 1.014e-01 11.734 < 2e-16 ***
## code_categoryResearcher 7.916e-01 1.054e-01 7.513 5.77e-14 ***
## code_categoryTeacher 9.053e-01 9.108e-02 9.940 < 2e-16 ***
## exposure_sum_conv 1.643e-05 1.394e-06 11.785 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 23423.1 on 247 degrees of freedom
## Residual deviance: 5487.4 on 237 degrees of freedom
## AIC: 6018.3
##
## Number of Fisher Scoring iterations: 7
m4 <- glm(post_n ~ 1 +
scale(years_on_twitter) +
scale(n_tweets) +
scale(n_days) +
scale(pre_n) +
code_category +
exposure_sum_end,
data = influence,
family = 'poisson')
summary(m4)
##
## Call:
## glm(formula = post_n ~ 1 + scale(years_on_twitter) + scale(n_tweets) +
## scale(n_days) + scale(pre_n) + code_category + exposure_sum_end,
## family = "poisson", data = influence)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -9.2262 -3.7847 -2.7177 -0.1993 21.9250
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.447e+00 8.598e-02 16.826 < 2e-16 ***
## scale(years_on_twitter) -3.872e-02 1.679e-02 -2.306 0.0211 *
## scale(n_tweets) 4.984e-01 3.566e-02 13.976 < 2e-16 ***
## scale(n_days) 6.411e-01 1.439e-02 44.552 < 2e-16 ***
## scale(pre_n) -3.160e-01 2.998e-02 -10.540 < 2e-16 ***
## code_categoryAdministrator 9.686e-01 9.266e-02 10.454 < 2e-16 ***
## code_categoryOrganization 6.892e-01 1.076e-01 6.403 1.53e-10 ***
## code_categoryOther 1.166e+00 1.014e-01 11.497 < 2e-16 ***
## code_categoryResearcher 8.324e-01 1.060e-01 7.851 4.14e-15 ***
## code_categoryTeacher 8.983e-01 9.112e-02 9.859 < 2e-16 ***
## exposure_sum_end 5.807e-04 7.432e-05 7.814 5.54e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 23423.1 on 247 degrees of freedom
## Residual deviance: 5562.8 on 237 degrees of freedom
## AIC: 6093.6
##
## Number of Fisher Scoring iterations: 7
m5 <- glm(post_n ~ 1 +
scale(years_on_twitter) +
scale(n_tweets) +
scale(n_days) +
scale(pre_n) +
code_category +
exposure_sum_conv +
exposure_sum_end,
data = influence,
family = 'poisson')
summary(m5)
##
## Call:
## glm(formula = post_n ~ 1 + scale(years_on_twitter) + scale(n_tweets) +
## scale(n_days) + scale(pre_n) + code_category + exposure_sum_conv +
## exposure_sum_end, family = "poisson", data = influence)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.7508 -3.7990 -2.7934 -0.4234 21.9990
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.406e+00 8.616e-02 16.320 < 2e-16 ***
## scale(years_on_twitter) -9.139e-02 1.790e-02 -5.105 3.30e-07 ***
## scale(n_tweets) 5.071e-01 3.507e-02 14.460 < 2e-16 ***
## scale(n_days) 6.097e-01 1.471e-02 41.451 < 2e-16 ***
## scale(pre_n) -3.077e-01 2.953e-02 -10.418 < 2e-16 ***
## code_categoryAdministrator 9.656e-01 9.279e-02 10.406 < 2e-16 ***
## code_categoryOrganization 7.888e-01 1.082e-01 7.289 3.12e-13 ***
## code_categoryOther 1.189e+00 1.014e-01 11.722 < 2e-16 ***
## code_categoryResearcher 7.870e-01 1.062e-01 7.411 1.26e-13 ***
## code_categoryTeacher 9.043e-01 9.112e-02 9.924 < 2e-16 ***
## exposure_sum_conv 1.601e-05 1.828e-06 8.760 < 2e-16 ***
## exposure_sum_end 3.540e-05 1.003e-04 0.353 0.724
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 23423.1 on 247 degrees of freedom
## Residual deviance: 5487.2 on 236 degrees of freedom
## AIC: 6020.1
##
## Number of Fisher Scoring iterations: 7
sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] brms_2.9.0 Rcpp_1.0.1 rTAGS_0.1.0
## [4] googlesheets_0.3.0 rtweet_0.6.9 sjstats_0.17.5
## [7] lme4_1.1-21 Matrix_1.2-17 igraph_1.2.4.1
## [10] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.1
## [13] purrr_0.3.2 readr_1.3.1 tidyr_0.8.3
## [16] tibble_2.1.3 ggplot2_3.1.1 tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] TH.data_1.0-10 minqa_1.2.4 colorspace_1.4-1
## [4] ggridges_0.5.1 rsconnect_0.8.13 sjlabelled_1.1.0
## [7] estimability_1.3 markdown_1.0 base64enc_0.1-3
## [10] rstudioapi_0.10 rstan_2.18.2 DT_0.6
## [13] fansi_0.4.0 mvtnorm_1.0-10 lubridate_1.7.4
## [16] xml2_1.2.0 bridgesampling_0.6-0 codetools_0.2-16
## [19] splines_3.5.3 knitr_1.23 shinythemes_1.1.2
## [22] sjmisc_2.7.9 zeallot_0.1.0 bayesplot_1.7.0
## [25] jsonlite_1.6 nloptr_1.2.1 broom_0.5.2
## [28] shiny_1.3.2 compiler_3.5.3 httr_1.4.0
## [31] emmeans_1.3.4 backports_1.1.4 assertthat_0.2.1
## [34] lazyeval_0.2.2 cli_1.1.0 later_0.8.0
## [37] prettyunits_1.0.2 htmltools_0.3.6 tools_3.5.3
## [40] coda_0.19-2 gtable_0.3.0 glue_1.3.1
## [43] corrr_0.3.2 reshape2_1.4.3 cellranger_1.1.0
## [46] vctrs_0.1.0 nlme_3.1-140 crosstalk_1.0.0
## [49] insight_0.3.0 xfun_0.7 ps_1.3.0
## [52] rvest_0.3.4 mime_0.6 miniUI_0.1.1.1
## [55] gtools_3.8.1 MASS_7.3-51.4 zoo_1.8-6
## [58] scales_1.0.0 colourpicker_1.0 hms_0.4.2
## [61] promises_1.0.1 Brobdingnag_1.2-6 parallel_3.5.3
## [64] sandwich_2.5-1 inline_0.3.15 shinystan_2.5.0
## [67] yaml_2.2.0 gridExtra_2.3 StanHeaders_2.18.1
## [70] loo_2.1.0 stringi_1.4.3 bayestestR_0.2.0
## [73] dygraphs_1.1.1.6 pkgbuild_1.0.3 boot_1.3-22
## [76] rlang_0.3.4 pkgconfig_2.0.2 matrixStats_0.54.0
## [79] evaluate_0.14 lattice_0.20-38 labeling_0.3
## [82] rstantools_1.5.1 htmlwidgets_1.3 processx_3.3.1
## [85] tidyselect_0.2.5 plyr_1.8.4 magrittr_1.5
## [88] R6_2.4.0 generics_0.0.2 multcomp_1.4-10
## [91] pillar_1.4.1 haven_2.1.0 withr_2.1.2
## [94] xts_0.11-2 survival_2.44-1.1 abind_1.4-5
## [97] performance_0.2.0 modelr_0.1.4 crayon_1.3.4
## [100] utf8_1.1.4 rmarkdown_1.13 grid_3.5.3
## [103] readxl_1.3.1 callr_3.2.0 threejs_0.3.1
## [106] digest_0.6.19 xtable_1.8-4 httpuv_1.5.1
## [109] stats4_3.5.3 munsell_0.5.0 shinyjs_1.0