loading, setting up

library(tidyverse)
library(lme4)

d <- read_rds('ngsschat-data-for-modeling.rds')

Descriptive analysis

overall

d %>% count(senti_binary) %>% spread(senti_binary, n) %>% 
  set_names(c("neg", "pos")) %>% mutate(senti_ratio = neg/pos)
## # A tibble: 1 x 3
##     neg    pos senti_ratio
##   <int>  <int>       <dbl>
## 1 54554 516824       0.106

by year

d %>% count(year, senti_binary) %>% spread(senti_binary, n) %>% set_names(c("year", "neg", "pos")) %>% mutate(senti_ratio = neg/pos) %>% 
  ggplot(aes(x = year, y = senti_ratio)) +
  geom_point() +
  geom_line() +
  geom_smooth()
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 row(s) containing missing values (geom_path).

by tweet type

d %>% count(type_of_tweet, senti_binary) %>% spread(senti_binary, n) %>% set_names(c("type_of_tweet", "neg", "pos")) %>% mutate(senti_ratio = neg/pos)
## # A tibble: 3 x 4
##   type_of_tweet       neg    pos senti_ratio
##   <chr>             <int>  <int>       <dbl>
## 1 ngsschat-chat      7727  59363      0.130 
## 2 ngsschat-non-chat 11131  92824      0.120 
## 3 non-ngsschat      35696 364637      0.0979

by state

d %>% 
  count(state, senti_binary) %>% 
  spread(senti_binary, n) %>% 
  set_names(c("state", "neg", "pos")) %>% 
  mutate(senti_ratio = neg/pos) %>% 
  arrange(desc(senti_ratio)) %>% 
  ggplot(aes(x = reorder(state, senti_ratio), y = senti_ratio)) +
  geom_col() +
  coord_flip()
## Warning: Removed 1 rows containing missing values (position_stack).

## by state, by year

d %>% 
  filter(year >= 2012) %>% 
  count(state, year, senti_binary) %>% 
  spread(senti_binary, n) %>% 
  set_names(c("state", "year", "neg", "pos")) %>% 
  mutate(senti_ratio = neg/pos) %>% 
  arrange(desc(senti_ratio)) %>% 
  ggplot(aes(x = reorder(state, senti_ratio), y = senti_ratio)) +
  geom_col() +
  coord_flip() +
  facet_wrap(~year)
## Warning: Removed 21 rows containing missing values (position_stack).

by adoption status

d %>% 
  count(adoption_key, senti_binary) %>% 
  spread(senti_binary, n) %>% 
  set_names(c("adoption_status", "neg", "pos")) %>% 
  mutate(senti_ratio = neg/pos) %>% 
  arrange(desc(senti_ratio))
## # A tibble: 5 x 4
##   adoption_status   neg    pos senti_ratio
##   <chr>           <int>  <int>       <dbl>
## 1 after_adoption   6748  55284      0.122 
## 2 before_adoption 14199 126678      0.112 
## 3 near_adoption   11064 104283      0.106 
## 4 no_adoption      2451  24166      0.101 
## 5 <NA>            20092 206413      0.0973

by adoption status by year

d %>% 
  filter(year >= 2012) %>% 
  count(adoption_key, year, senti_binary) %>% 
  spread(senti_binary, n) %>% 
  set_names(c("state", "year", "neg", "pos")) %>% 
  mutate(senti_ratio = neg/pos) %>% 
  arrange(desc(senti_ratio)) %>% 
  ggplot(aes(x = reorder(state, senti_ratio), y = senti_ratio)) +
  geom_col() +
  coord_flip() +
  facet_wrap(~year)

Models

Just modeling the effect of state

m0 <- lmer(senti_scale ~ 1 + (1|state), data = d)

sjPlot::tab_model(m0)
  senti scale
Predictors Estimates CI p
(Intercept) 5.68 5.64 – 5.72 <0.001
Random Effects
σ2 1.32
τ00 state 0.02
ICC 0.01
N state 53
Observations 338057
Marginal R2 / Conditional R2 0.000 / 0.015
performance::icc(m0, by_group = TRUE)
## # ICC by Group
## 
## Group |   ICC
## -------------
## state | 0.015

Just modeling the effect of screen_name

m1 <- lmer(senti_scale ~ 1 + (1|screen_name), data = d)

sjPlot::tab_model(m1)
  senti scale
Predictors Estimates CI p
(Intercept) 5.72 5.71 – 5.73 <0.001
Random Effects
σ2 1.15
τ00 screen_name 0.38
ICC 0.25
N screen_name 76235
Observations 536718
Marginal R2 / Conditional R2 0.000 / 0.248
performance::icc(m1, by_group = TRUE)
## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.248

Both state and screen name

m2 <- lmer(senti_scale ~ 1 + (1|state) + (1|screen_name), data = d)

sjPlot::tab_model(m2)
  senti scale
Predictors Estimates CI p
(Intercept) 5.69 5.66 – 5.72 <0.001
Random Effects
σ2 1.18
τ00 screen_name 0.29
τ00 state 0.01
ICC 0.20
N state 53
N screen_name 39520
Observations 338057
Marginal R2 / Conditional R2 0.000 / 0.204
performance::icc(m2, by_group = TRUE)
## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.197
## state       | 0.007

Adding whether the tweets incuded ngsschat or not

m4 <- lmer(senti_scale ~ 1 + type_of_tweet + (1|state) + (1|screen_name), data = d)

sjPlot::tab_model(m4, show.icc = TRUE)
  senti scale
Predictors Estimates CI p
(Intercept) 5.65 5.62 – 5.68 <0.001
type_of_tweet
[ngsschat-non-chat]
0.08 0.07 – 0.09 <0.001
type_of_tweet
[non-ngsschat]
0.04 0.02 – 0.05 <0.001
Random Effects
σ2 1.18
τ00 screen_name 0.29
τ00 state 0.01
ICC 0.20
N state 53
N screen_name 39520
Observations 338057
Marginal R2 / Conditional R2 0.000 / 0.204
performance::icc(m4, by_group = TRUE)
## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.197
## state       | 0.007

Adding year as a factor

m5 <- lmer(senti_scale ~ 1 + type_of_tweet + year_fct + (1|state) + (1|screen_name), data = d)

sjPlot::tab_model(m5, show.icc = TRUE)
  senti scale
Predictors Estimates CI p
(Intercept) 5.55 5.52 – 5.58 <0.001
type_of_tweet
[ngsschat-non-chat]
0.06 0.05 – 0.08 <0.001
type_of_tweet
[non-ngsschat]
0.04 0.03 – 0.05 <0.001
year_fct [2009] -0.19 -1.24 – 0.85 0.720
year_fct [2010] -0.65 -1.24 – -0.07 0.028
year_fct [2011] -0.26 -0.46 – -0.05 0.016
year_fct [2012] -0.24 -0.29 – -0.19 <0.001
year_fct [2013] -0.17 -0.19 – -0.15 <0.001
year_fct [2014] -0.13 -0.15 – -0.11 <0.001
year_fct [2015] -0.04 -0.05 – -0.02 <0.001
year_fct [2017] 0.07 0.06 – 0.09 <0.001
year_fct [2018] 0.26 0.25 – 0.27 <0.001
year_fct [2019] 0.29 0.28 – 0.31 <0.001
year_fct [2020] 0.33 0.31 – 0.35 <0.001
Random Effects
σ2 1.18
τ00 screen_name 0.25
τ00 state 0.01
ICC 0.18
N state 53
N screen_name 39520
Observations 338057
Marginal R2 / Conditional R2 0.019 / 0.195
performance::icc(m5, by_group = TRUE)
## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.174
## state       | 0.006

Adding year (scaled) as a slope

m6 <- lmer(senti_scale ~ 1 + type_of_tweet + scale(year, scale = FALSE) + (1|state) + (1|screen_name), data = d)

sjPlot::tab_model(m6, show.icc = TRUE)
  senti scale
Predictors Estimates CI p
(Intercept) 5.65 5.62 – 5.68 <0.001
type_of_tweet
[ngsschat-non-chat]
0.06 0.04 – 0.07 <0.001
type_of_tweet
[non-ngsschat]
0.04 0.02 – 0.05 <0.001
scale(year, scale =
FALSE)
0.08 0.08 – 0.08 <0.001
Random Effects
σ2 1.16
τ00 screen_name 0.25
τ00 state 0.01
ICC 0.18
N state 52
N screen_name 40479
Observations 344994
Marginal R2 / Conditional R2 0.018 / 0.193
performance::icc(m6, by_group = TRUE)
## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.174
## state       | 0.005

Adding year as a random effect

m7 <- lmer(senti_scale~ 1 + type_of_tweet + (1|year) + (1|state) + (1|screen_name), data = d)

sjPlot::tab_model(m7, show.icc = TRUE)
  senti scale
Predictors Estimates CI p
(Intercept) 5.54 5.40 – 5.68 <0.001
type_of_tweet
[ngsschat-non-chat]
0.06 0.05 – 0.08 <0.001
type_of_tweet
[non-ngsschat]
0.04 0.03 – 0.05 <0.001
Random Effects
σ2 1.18
τ00 screen_name 0.25
τ00 state 0.01
τ00 year 0.05
ICC 0.21
N year 12
N state 53
N screen_name 39520
Observations 338057
Marginal R2 / Conditional R2 0.000 / 0.207
performance::icc(m7, by_group = TRUE)
## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.168
## state       | 0.005
## year        | 0.034

Adding adoption status

m8 <- lmer(senti_scale ~ -1 + type_of_tweet + year_centered + adoption_key + lead + modified + time_on_twitter + (1|state) + (1|screen_name), data = d)

sjPlot::plot_model(m8) + hrbrthemes::theme_ipsum()

performance::icc(m8, by_group = TRUE)
## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.210
## state       | 0.006
sjPlot::tab_model(m8, show.icc = TRUE, show.se = TRUE, file = 'out1.doc')
  senti scale
Predictors Estimates std. Error CI p
type_of_tweet
[ngsschat-chat]
5.69 0.03 5.63 – 5.75 <0.001
type_of_tweet
[ngsschat-non-chat]
5.74 0.03 5.68 – 5.81 <0.001
type_of_tweet
[non-ngsschat]
5.73 0.03 5.66 – 5.79 <0.001
year_centered 0.09 0.00 0.09 – 0.09 <0.001
adoption_key
[before_adoption]
0.06 0.03 -0.00 – 0.12 0.069
adoption_key
[near_adoption]
0.01 0.03 -0.05 – 0.07 0.764
adoption_key
[no_adoption]
-0.06 0.05 -0.16 – 0.03 0.181
lead -0.00 0.03 -0.06 – 0.05 0.902
modified 0.03 0.03 -0.02 – 0.09 0.228
time_on_twitter -0.02 0.00 -0.02 – -0.01 <0.001
Random Effects
σ2 1.16
τ00 screen_name 0.24
τ00 state 0.01
ICC 0.00
N state 51
N screen_name 40387
Observations 344873
Marginal R2 / Conditional R2 0.026 / 0.026

Adding others state-level vars

m10 <- lmer(senti_scale ~ 1 + scale(year, scale = FALSE) + lead + modified + type_of_tweet + (1|state) + (1|screen_name), data = d)

sjPlot::tab_model(m10, show.icc = TRUE)
  senti scale
Predictors Estimates CI p
(Intercept) 5.63 5.58 – 5.68 <0.001
scale(year, scale =
FALSE)
0.08 0.08 – 0.08 <0.001
lead 0.01 -0.05 – 0.06 0.803
modified 0.04 -0.02 – 0.10 0.192
type_of_tweet
[ngsschat-non-chat]
0.06 0.04 – 0.07 <0.001
type_of_tweet
[non-ngsschat]
0.04 0.02 – 0.05 <0.001
Random Effects
σ2 1.16
τ00 screen_name 0.25
τ00 state 0.01
ICC 0.18
N state 51
N screen_name 40387
Observations 344873
Marginal R2 / Conditional R2 0.018 / 0.194
performance::icc(m10, by_group = TRUE)
## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.174
## state       | 0.005

ignoring state because of missing data

m11 <- lmer(senti_scale ~ 1 + scale(year, scale = FALSE) + type_of_tweet + (1|screen_name), data = d)

sjPlot::tab_model(m11, show.icc = TRUE)
  senti scale
Predictors Estimates CI p
(Intercept) 5.65 5.64 – 5.66 <0.001
scale(year, scale =
FALSE)
0.08 0.08 – 0.08 <0.001
type_of_tweet
[ngsschat-non-chat]
0.07 0.06 – 0.08 <0.001
type_of_tweet
[non-ngsschat]
0.04 0.03 – 0.06 <0.001
Random Effects
σ2 1.10
τ00 screen_name 0.29
ICC 0.21
N screen_name 87818
Observations 571378
Marginal R2 / Conditional R2 0.018 / 0.221
performance::icc(m11, by_group = TRUE)
## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.206

ignoring screen name to understand potential bias from not doing so

m12 <- lm(senti_scale ~ 1 + scale(year, scale = FALSE) + type_of_tweet, data = d)

sjPlot::tab_model(m12, show.icc = TRUE)
  senti scale
Predictors Estimates CI p
(Intercept) 5.78 5.77 – 5.79 <0.001
scale(year, scale =
FALSE)
0.07 0.07 – 0.07 <0.001
type_of_tweet
[ngsschat-non-chat]
-0.06 -0.07 – -0.05 <0.001
type_of_tweet
[non-ngsschat]
-0.08 -0.09 – -0.07 <0.001
Observations 571378
R2 / R2 adjusted 0.015 / 0.015