loading, setting up

library(tidyverse)
library(lme4)

d <- read_rds('ngsschat-data-for-modeling.rds')

Descriptive analysis

overall

d %>% count(senti_binary) %>% spread(senti_binary, n) %>% 
  set_names(c("neg", "pos")) %>% mutate(senti_ratio = neg/pos)

## # A tibble: 1 x 3
##     neg    pos senti_ratio
##   <int>  <int>       <dbl>
## 1 54554 516824       0.106

by year

d %>% count(year, senti_binary) %>% spread(senti_binary, n) %>% set_names(c("year", "neg", "pos")) %>% mutate(senti_ratio = neg/pos) %>% 
  ggplot(aes(x = year, y = senti_ratio)) +
  geom_point() +
  geom_line() +
  geom_smooth()

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 row(s) containing missing values (geom_path).

by tweet type

d %>% count(type_of_tweet, senti_binary) %>% spread(senti_binary, n) %>% set_names(c("type_of_tweet", "neg", "pos")) %>% mutate(senti_ratio = neg/pos)

## # A tibble: 3 x 4
##   type_of_tweet       neg    pos senti_ratio
##   <chr>             <int>  <int>       <dbl>
## 1 ngsschat-chat      7727  59363      0.130 
## 2 ngsschat-non-chat 11131  92824      0.120 
## 3 non-ngsschat      35696 364637      0.0979

by state

d %>% 
  count(state, senti_binary) %>% 
  spread(senti_binary, n) %>% 
  set_names(c("state", "neg", "pos")) %>% 
  mutate(senti_ratio = neg/pos) %>% 
  arrange(desc(senti_ratio)) %>% 
  ggplot(aes(x = reorder(state, senti_ratio), y = senti_ratio)) +
  geom_col() +
  coord_flip()

## Warning: Removed 1 rows containing missing values (position_stack).

## by state, by year

d %>% 
  filter(year >= 2012) %>% 
  count(state, year, senti_binary) %>% 
  spread(senti_binary, n) %>% 
  set_names(c("state", "year", "neg", "pos")) %>% 
  mutate(senti_ratio = neg/pos) %>% 
  arrange(desc(senti_ratio)) %>% 
  ggplot(aes(x = reorder(state, senti_ratio), y = senti_ratio)) +
  geom_col() +
  coord_flip() +
  facet_wrap(~year)

## Warning: Removed 21 rows containing missing values (position_stack).

by adoption status

d %>% 
  count(adoption_key, senti_binary) %>% 
  spread(senti_binary, n) %>% 
  set_names(c("adoption_status", "neg", "pos")) %>% 
  mutate(senti_ratio = neg/pos) %>% 
  arrange(desc(senti_ratio))

## # A tibble: 5 x 4
##   adoption_status   neg    pos senti_ratio
##   <chr>           <int>  <int>       <dbl>
## 1 after_adoption   6748  55284      0.122 
## 2 before_adoption 14199 126678      0.112 
## 3 near_adoption   11064 104283      0.106 
## 4 no_adoption      2451  24166      0.101 
## 5 <NA>            20092 206413      0.0973

by adoption status by year

d %>% 
  filter(year >= 2012) %>% 
  count(adoption_key, year, senti_binary) %>% 
  spread(senti_binary, n) %>% 
  set_names(c("state", "year", "neg", "pos")) %>% 
  mutate(senti_ratio = neg/pos) %>% 
  arrange(desc(senti_ratio)) %>% 
  ggplot(aes(x = reorder(state, senti_ratio), y = senti_ratio)) +
  geom_col() +
  coord_flip() +
  facet_wrap(~year)

Models

Just modeling the effect of state

m0 <- lmer(senti_scale ~ 1 + (1|state), data = d)

sjPlot::tab_model(m0)

	senti scale
Predictors	Estimates	CI	p
(Intercept)	5.68	5.64 – 5.72	<0.001
Random Effects
σ²	1.32
τ₀₀ _state	0.02
ICC	0.01
N _state	53
Observations	338057
Marginal R² / Conditional R²	0.000 / 0.015

performance::icc(m0, by_group = TRUE)

## # ICC by Group
## 
## Group |   ICC
## -------------
## state | 0.015

Just modeling the effect of screen_name

m1 <- lmer(senti_scale ~ 1 + (1|screen_name), data = d)

sjPlot::tab_model(m1)

	senti scale
Predictors	Estimates	CI	p
(Intercept)	5.72	5.71 – 5.73	<0.001
Random Effects
σ²	1.15
τ₀₀ _{screen_name}	0.38
ICC	0.25
N _{screen_name}	76235
Observations	536718
Marginal R² / Conditional R²	0.000 / 0.248

performance::icc(m1, by_group = TRUE)

## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.248

Both state and screen name

m2 <- lmer(senti_scale ~ 1 + (1|state) + (1|screen_name), data = d)

sjPlot::tab_model(m2)

	senti scale
Predictors	Estimates	CI	p
(Intercept)	5.69	5.66 – 5.72	<0.001
Random Effects
σ²	1.18
τ₀₀ _{screen_name}	0.29
τ₀₀ _state	0.01
ICC	0.20
N _state	53
N _{screen_name}	39520
Observations	338057
Marginal R² / Conditional R²	0.000 / 0.204

performance::icc(m2, by_group = TRUE)

## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.197
## state       | 0.007

Adding whether the tweets incuded ngsschat or not

m4 <- lmer(senti_scale ~ 1 + type_of_tweet + (1|state) + (1|screen_name), data = d)

sjPlot::tab_model(m4, show.icc = TRUE)

	senti scale
Predictors	Estimates	CI	p
(Intercept)	5.65	5.62 – 5.68	<0.001
type_of_tweet [ngsschat-non-chat]	0.08	0.07 – 0.09	<0.001
type_of_tweet [non-ngsschat]	0.04	0.02 – 0.05	<0.001
Random Effects
σ²	1.18
τ₀₀ _{screen_name}	0.29
τ₀₀ _state	0.01
ICC	0.20
N _state	53
N _{screen_name}	39520
Observations	338057
Marginal R² / Conditional R²	0.000 / 0.204

performance::icc(m4, by_group = TRUE)

## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.197
## state       | 0.007

Adding year as a factor

m5 <- lmer(senti_scale ~ 1 + type_of_tweet + year_fct + (1|state) + (1|screen_name), data = d)

sjPlot::tab_model(m5, show.icc = TRUE)

	senti scale
Predictors	Estimates	CI	p
(Intercept)	5.55	5.52 – 5.58	<0.001
type_of_tweet [ngsschat-non-chat]	0.06	0.05 – 0.08	<0.001
type_of_tweet [non-ngsschat]	0.04	0.03 – 0.05	<0.001
year_fct [2009]	-0.19	-1.24 – 0.85	0.720
year_fct [2010]	-0.65	-1.24 – -0.07	0.028
year_fct [2011]	-0.26	-0.46 – -0.05	0.016
year_fct [2012]	-0.24	-0.29 – -0.19	<0.001
year_fct [2013]	-0.17	-0.19 – -0.15	<0.001
year_fct [2014]	-0.13	-0.15 – -0.11	<0.001
year_fct [2015]	-0.04	-0.05 – -0.02	<0.001
year_fct [2017]	0.07	0.06 – 0.09	<0.001
year_fct [2018]	0.26	0.25 – 0.27	<0.001
year_fct [2019]	0.29	0.28 – 0.31	<0.001
year_fct [2020]	0.33	0.31 – 0.35	<0.001
Random Effects
σ²	1.18
τ₀₀ _{screen_name}	0.25
τ₀₀ _state	0.01
ICC	0.18
N _state	53
N _{screen_name}	39520
Observations	338057
Marginal R² / Conditional R²	0.019 / 0.195

performance::icc(m5, by_group = TRUE)

## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.174
## state       | 0.006

Adding year (scaled) as a slope

m6 <- lmer(senti_scale ~ 1 + type_of_tweet + scale(year, scale = FALSE) + (1|state) + (1|screen_name), data = d)

sjPlot::tab_model(m6, show.icc = TRUE)

	senti scale
Predictors	Estimates	CI	p
(Intercept)	5.65	5.62 – 5.68	<0.001
type_of_tweet [ngsschat-non-chat]	0.06	0.04 – 0.07	<0.001
type_of_tweet [non-ngsschat]	0.04	0.02 – 0.05	<0.001
scale(year, scale = FALSE)	0.08	0.08 – 0.08	<0.001
Random Effects
σ²	1.16
τ₀₀ _{screen_name}	0.25
τ₀₀ _state	0.01
ICC	0.18
N _state	52
N _{screen_name}	40479
Observations	344994
Marginal R² / Conditional R²	0.018 / 0.193

performance::icc(m6, by_group = TRUE)

## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.174
## state       | 0.005

Adding year as a random effect

m7 <- lmer(senti_scale~ 1 + type_of_tweet + (1|year) + (1|state) + (1|screen_name), data = d)

sjPlot::tab_model(m7, show.icc = TRUE)

	senti scale
Predictors	Estimates	CI	p
(Intercept)	5.54	5.40 – 5.68	<0.001
type_of_tweet [ngsschat-non-chat]	0.06	0.05 – 0.08	<0.001
type_of_tweet [non-ngsschat]	0.04	0.03 – 0.05	<0.001
Random Effects
σ²	1.18
τ₀₀ _{screen_name}	0.25
τ₀₀ _state	0.01
τ₀₀ _year	0.05
ICC	0.21
N _year	12
N _state	53
N _{screen_name}	39520
Observations	338057
Marginal R² / Conditional R²	0.000 / 0.207

performance::icc(m7, by_group = TRUE)

## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.168
## state       | 0.005
## year        | 0.034

Adding adoption status

m8 <- lmer(senti_scale ~ -1 + type_of_tweet + year_centered + adoption_key + lead + modified + time_on_twitter + (1|state) + (1|screen_name), data = d)

sjPlot::plot_model(m8) + hrbrthemes::theme_ipsum()

performance::icc(m8, by_group = TRUE)

## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.210
## state       | 0.006

sjPlot::tab_model(m8, show.icc = TRUE, show.se = TRUE, file = 'out1.doc')

	senti scale
Predictors	Estimates	std. Error	CI	p
type_of_tweet [ngsschat-chat]	5.69	0.03	5.63 – 5.75	<0.001
type_of_tweet [ngsschat-non-chat]	5.74	0.03	5.68 – 5.81	<0.001
type_of_tweet [non-ngsschat]	5.73	0.03	5.66 – 5.79	<0.001
year_centered	0.09	0.00	0.09 – 0.09	<0.001
adoption_key [before_adoption]	0.06	0.03	-0.00 – 0.12	0.069
adoption_key [near_adoption]	0.01	0.03	-0.05 – 0.07	0.764
adoption_key [no_adoption]	-0.06	0.05	-0.16 – 0.03	0.181
lead	-0.00	0.03	-0.06 – 0.05	0.902
modified	0.03	0.03	-0.02 – 0.09	0.228
time_on_twitter	-0.02	0.00	-0.02 – -0.01	<0.001
Random Effects
σ²	1.16
τ₀₀ _{screen_name}	0.24
τ₀₀ _state	0.01
ICC	0.00
N _state	51
N _{screen_name}	40387
Observations	344873
Marginal R² / Conditional R²	0.026 / 0.026

Adding others state-level vars

m10 <- lmer(senti_scale ~ 1 + scale(year, scale = FALSE) + lead + modified + type_of_tweet + (1|state) + (1|screen_name), data = d)

sjPlot::tab_model(m10, show.icc = TRUE)

	senti scale
Predictors	Estimates	CI	p
(Intercept)	5.63	5.58 – 5.68	<0.001
scale(year, scale = FALSE)	0.08	0.08 – 0.08	<0.001
lead	0.01	-0.05 – 0.06	0.803
modified	0.04	-0.02 – 0.10	0.192
type_of_tweet [ngsschat-non-chat]	0.06	0.04 – 0.07	<0.001
type_of_tweet [non-ngsschat]	0.04	0.02 – 0.05	<0.001
Random Effects
σ²	1.16
τ₀₀ _{screen_name}	0.25
τ₀₀ _state	0.01
ICC	0.18
N _state	51
N _{screen_name}	40387
Observations	344873
Marginal R² / Conditional R²	0.018 / 0.194

performance::icc(m10, by_group = TRUE)

## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.174
## state       | 0.005

ignoring state because of missing data

m11 <- lmer(senti_scale ~ 1 + scale(year, scale = FALSE) + type_of_tweet + (1|screen_name), data = d)

sjPlot::tab_model(m11, show.icc = TRUE)

	senti scale
Predictors	Estimates	CI	p
(Intercept)	5.65	5.64 – 5.66	<0.001
scale(year, scale = FALSE)	0.08	0.08 – 0.08	<0.001
type_of_tweet [ngsschat-non-chat]	0.07	0.06 – 0.08	<0.001
type_of_tweet [non-ngsschat]	0.04	0.03 – 0.06	<0.001
Random Effects
σ²	1.10
τ₀₀ _{screen_name}	0.29
ICC	0.21
N _{screen_name}	87818
Observations	571378
Marginal R² / Conditional R²	0.018 / 0.221

performance::icc(m11, by_group = TRUE)

## # ICC by Group
## 
## Group       |   ICC
## -------------------
## screen_name | 0.206

ignoring screen name to understand potential bias from not doing so

m12 <- lm(senti_scale ~ 1 + scale(year, scale = FALSE) + type_of_tweet, data = d)

sjPlot::tab_model(m12, show.icc = TRUE)

	senti scale
Predictors	Estimates	CI	p
(Intercept)	5.78	5.77 – 5.79	<0.001
scale(year, scale = FALSE)	0.07	0.07 – 0.07	<0.001
type_of_tweet [ngsschat-non-chat]	-0.06	-0.07 – -0.05	<0.001
type_of_tweet [non-ngsschat]	-0.08	-0.09 – -0.07	<0.001
Observations	571378
R² / R² adjusted	0.015 / 0.015

#NGSSchat sentiment modeling

2020-08-27

loading, setting up

Descriptive analysis

overall

by year

by tweet type

by state

by adoption status

by adoption status by year

Models

Just modeling the effect of state

Just modeling the effect of screen_name

Both state and screen name

Adding whether the tweets incuded ngsschat or not

Adding year as a factor

Adding year (scaled) as a slope

Adding year as a random effect

Adding adoption status

Adding others state-level vars

ignoring state because of missing data

ignoring screen name to understand potential bias from not doing so