Loading, setting up

library(googlesheets4)
library(tidyverse)

s <- read_sheet("https://docs.google.com/spreadsheets/d/1lGTr8Rq3Po_UZwkO6ycdByopCEcBloyOCbpdi02OWrs/edit#gid=0", skip = 0, col_types = "ccdccdciiidddcilddddddddccccc")

tweets <- read_csv("tweets.csv")

tweets <- tweets %>% janitor::clean_names()

s <- s %>% janitor::clean_names()

ss <- s %>% 
  select(nces_id, page_name = school, 
         proportion_of_black_students_percent:se_of_white_hispanic_gap_in_math) %>% 
  mutate(year = 2020)

sss <- ss %>% 
    mutate(year = 2021)

ssss <- ss %>% 
  bind_rows(sss)
tweets <- tweets %>% 
  select(page_name, post_created, police_brutality:equity_inequity, followers_at_posting, likes:care) %>% 
  slice(1:609) %>% 
  mutate_at(vars(c("police_brutality", "racial_injustice_inequality", "george_floyd", "justice_injustice", "racism_race", "protests", "steps_taken_to_resolve_issues", "off_topic_unclear", "equity_inequity")), as.double) %>% 
  mutate_at(vars(c("police_brutality", "racial_injustice_inequality", "george_floyd", "justice_injustice", "racism_race", "protests", "steps_taken_to_resolve_issues", "off_topic_unclear", "equity_inequity")), replace_na, 0)

tweets_summarized <- tweets %>% 
  mutate(post_created = lubridate::ymd_hms(post_created)) %>% 
  mutate(year = lubridate::year(post_created)) %>% 
  select(page_name, year, police_brutality:equity_inequity, followers_at_posting) %>% 
  slice(1:609) %>% 
  group_by(page_name, year) %>% 
  summarize_at(vars(c("police_brutality", "racial_injustice_inequality", "george_floyd", "justice_injustice", "racism_race", "protests", "steps_taken_to_resolve_issues", "off_topic_unclear", "equity_inequity")), list(sum = sum)) %>% 
  ungroup()

s_tj <- ssss %>%
  left_join(tweets_summarized) %>% 
  filter(!is.na(nces_id)) %>% 
  mutate_at(vars(c("police_brutality_sum", "racial_injustice_inequality_sum", "george_floyd_sum", "justice_injustice_sum", "racism_race_sum", "protests_sum", "steps_taken_to_resolve_issues_sum", "off_topic_unclear_sum", "equity_inequity_sum")), 
            replace_na, 0) %>% 
  select(nces_id, page_name, year, everything())

Exploratory Data Analysis

summary stats for all variables

skimr::skim(s_tj)
Data summary
Name s_tj
Number of rows 200
Number of columns 29
_______________________
Column type frequency:
character 2
logical 1
numeric 26
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
page_name 0 1.00 8 46 0 100 0
date_closed_for_the_19_20_school_year 8 0.96 10 10 0 9 0

Variable type: logical

skim_variable n_missing complete_rate mean count
experience_of_police_brutality_within_x_miles 200 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
nces_id 0 1.00 2980103.99 1731462.40 102370.00 1201552.50 3187440.00 4819662.50 5509600.00 ▃▆▂▂▇
year 0 1.00 2020.50 0.50 2020.00 2020.00 2020.50 2021.00 2021.00 ▇▁▁▁▇
proportion_of_black_students_percent 8 0.96 18.15 14.60 1.00 7.00 15.00 26.00 68.00 ▇▅▂▁▁
proportion_of_other_students_of_color_percent 8 0.96 33.46 18.51 8.00 18.00 29.50 47.00 81.00 ▇▅▃▂▂
median_household_income 10 0.95 69685.88 23156.87 7115.00 53412.25 63602.00 82696.50 142299.00 ▁▇▆▂▁
proportion_of_in_school_suspensions_for_black_students_percent 6 0.97 37.62 23.16 2.20 19.30 35.90 53.70 87.70 ▇▇▆▅▂
proportion_of_out_of_school_suspensions_for_black_students_percent 8 0.96 42.97 24.92 1.70 25.08 38.05 62.88 94.80 ▆▇▆▅▃
proportion_of_expulsions_for_black_students_percent 26 0.87 46.33 29.15 0.00 22.70 41.30 74.30 100.00 ▇▇▆▅▅
proximity_to_minneapolis_miles 0 1.00 1180.47 421.07 291.00 947.00 1124.00 1320.50 3933.00 ▅▇▁▁▁
white_black_gap_in_ela 6 0.97 0.75 0.24 0.15 0.61 0.74 0.88 1.49 ▂▇▇▃▁
se_of_white_black_gap_in_ela 6 0.97 0.02 0.02 0.01 0.01 0.01 0.02 0.18 ▇▁▁▁▁
white_hispanic_gap_in_ela 6 0.97 0.65 0.24 0.17 0.50 0.63 0.76 1.33 ▃▇▆▃▁
se_of_white_hispanic_gap_in_ela 6 0.97 0.02 0.01 0.01 0.01 0.01 0.02 0.07 ▇▂▁▁▁
white_black_gap_in_math 6 0.97 0.79 0.23 0.15 0.65 0.78 0.90 1.42 ▁▅▇▃▁
se_of_white_black_gap_in_math 6 0.97 0.02 0.01 0.01 0.01 0.02 0.02 0.07 ▇▃▁▁▁
white_hispanic_gap_in_math 6 0.97 0.58 0.23 0.01 0.43 0.56 0.71 1.24 ▂▆▇▃▁
se_of_white_hispanic_gap_in_math 6 0.97 0.02 0.01 0.01 0.01 0.01 0.02 0.04 ▇▇▅▂▁
police_brutality_sum 0 1.00 0.00 0.07 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
racial_injustice_inequality_sum 0 1.00 0.13 0.42 0.00 0.00 0.00 0.00 2.00 ▇▁▁▁▁
george_floyd_sum 0 1.00 0.30 0.67 0.00 0.00 0.00 0.00 5.00 ▇▁▁▁▁
justice_injustice_sum 0 1.00 0.30 0.69 0.00 0.00 0.00 0.00 4.00 ▇▂▁▁▁
racism_race_sum 0 1.00 0.84 1.80 0.00 0.00 0.00 1.00 11.00 ▇▁▁▁▁
protests_sum 0 1.00 0.12 0.48 0.00 0.00 0.00 0.00 5.00 ▇▁▁▁▁
steps_taken_to_resolve_issues_sum 0 1.00 0.84 1.79 0.00 0.00 0.00 1.00 16.00 ▇▁▁▁▁
off_topic_unclear_sum 0 1.00 1.15 1.94 0.00 0.00 1.00 1.00 17.00 ▇▁▁▁▁
equity_inequity_sum 0 1.00 0.86 2.06 0.00 0.00 0.00 1.00 18.00 ▇▁▁▁▁

Some data viz of codes

s_tj %>% 
  select(police_brutality_sum:equity_inequity_sum) %>% 
  gather(key, val) %>% 
  filter(key != "off_topic_unclear_sum") %>% 
  group_by(key) %>% 
  summarize(sum_val = sum(val)) %>% 
  ggplot(aes(x = reorder(key, sum_val), y = sum_val)) +
  geom_col() + 
  coord_flip() +
  ylab("Number of posts (overall)") +
  xlab("Theme") +
  theme_light()

s_tj %>% 
  select(page_name, police_brutality_sum:equity_inequity_sum) %>% 
  gather(key, val, -page_name) %>% 
  filter(key != "off_topic_unclear_sum") %>% 
  group_by(page_name, key) %>% 
  summarize(sum_val = sum(val)) %>% 
  ungroup() %>% 
  filter(sum_val != 0) %>% 
  count(page_name, key) %>%
  group_by(key) %>% 
  summarize(sum_n = sum(n)) %>% 
  mutate(sum_n = sum_n/100) %>% 
  ggplot(aes(x = reorder(key, sum_n), y = sum_n)) +
  geom_col() + 
  coord_flip() +
  ylab("Proportion of districts posting at least once") +
  xlab("Theme") +
  theme_light()

some models

m1 <- glm(steps_taken_to_resolve_issues_sum ~ 
            1 + 
            as.factor(year) + 
            I(proximity_to_minneapolis_miles/100) +
            I(median_household_income/1000) + 
            I(proportion_of_out_of_school_suspensions_for_black_students_percent/10) +
            white_black_gap_in_math, 
          data = s_tj, family = "poisson")

sjPlot::tab_model(m1)
  steps taken to resolve
issues sum
Predictors Incidence Rate Ratios CI p
(Intercept) 0.60 0.21 – 1.65 0.325
year [2021] 0.44 0.31 – 0.61 <0.001
proximity to minneapolis
miles/100
0.96 0.91 – 1.01 0.108
median household
income/1000
1.01 1.00 – 1.02 0.003
proportion of out of
school suspensions for
black students percent/10
1.24 1.15 – 1.33 <0.001
white black gap in math 0.48 0.25 – 0.91 0.025
Observations 182
R2 Nagelkerke 0.365
m2 <- glm(racism_race_sum ~ 
            1 + 
            as.factor(year) + 
            I(proximity_to_minneapolis_miles/100) +
            I(median_household_income/1000) + 
            I(proportion_of_out_of_school_suspensions_for_black_students_percent/10) +
            white_black_gap_in_math, 
          data = s_tj, family = "poisson")

sjPlot::tab_model(m2)
  racism race sum
Predictors Incidence Rate Ratios CI p
(Intercept) 0.56 0.19 – 1.66 0.297
year [2021] 0.33 0.22 – 0.48 <0.001
proximity to minneapolis
miles/100
0.99 0.94 – 1.03 0.664
median household
income/1000
1.01 1.00 – 1.02 0.001
proportion of out of
school suspensions for
black students percent/10
1.16 1.07 – 1.26 <0.001
white black gap in math 0.40 0.20 – 0.84 0.014
Observations 182
R2 Nagelkerke 0.320
m3 <- glm(equity_inequity_sum ~ 
            1 + 
            as.factor(year) + 
            I(proximity_to_minneapolis_miles/100) +
            I(median_household_income/1000) + 
            I(proportion_of_out_of_school_suspensions_for_black_students_percent/10) +
            white_black_gap_in_math, 
          data = s_tj, family = "poisson")

sjPlot::tab_model(m3)
  equity inequity sum
Predictors Incidence Rate Ratios CI p
(Intercept) 1.67 0.59 – 4.69 0.333
year [2021] 0.64 0.46 – 0.90 0.010
proximity to minneapolis
miles/100
0.94 0.90 – 0.99 0.029
median household
income/1000
1.00 0.99 – 1.00 0.398
proportion of out of
school suspensions for
black students percent/10
1.05 0.97 – 1.13 0.228
white black gap in math 1.15 0.55 – 2.43 0.714
Observations 182
R2 Nagelkerke 0.107
m4 <- glm(george_floyd_sum ~ 
            1 + 
            as.factor(year) + 
            I(proximity_to_minneapolis_miles/100) +
            I(median_household_income/1000) + 
            I(proportion_of_out_of_school_suspensions_for_black_students_percent/10) +
            white_black_gap_in_math, 
          data = s_tj, family = "poisson")

sjPlot::tab_model(m4)
  george floyd sum
Predictors Incidence Rate Ratios CI p
(Intercept) 1.04 0.21 – 5.36 0.958
year [2021] 0.15 0.06 – 0.31 <0.001
proximity to minneapolis
miles/100
1.01 0.94 – 1.07 0.813
median household
income/1000
0.99 0.98 – 1.00 0.152
proportion of out of
school suspensions for
black students percent/10
1.02 0.90 – 1.16 0.716
white black gap in math 0.72 0.22 – 2.48 0.601
Observations 182
R2 Nagelkerke 0.290
m5 <- glm(justice_injustice_sum ~ 
            1 + 
            as.factor(year) + 
            I(proximity_to_minneapolis_miles/100) +
            I(median_household_income/1000) + 
            I(proportion_of_out_of_school_suspensions_for_black_students_percent/10) +
            white_black_gap_in_math, 
          data = s_tj, family = "poisson")

sjPlot::tab_model(m5)
  justice injustice sum
Predictors Incidence Rate Ratios CI p
(Intercept) 0.06 0.01 – 0.31 0.001
year [2021] 0.59 0.33 – 1.01 0.060
proximity to minneapolis
miles/100
1.05 0.97 – 1.11 0.200
median household
income/1000
1.00 0.98 – 1.01 0.829
proportion of out of
school suspensions for
black students percent/10
1.22 1.07 – 1.39 0.003
white black gap in math 1.72 0.58 – 5.40 0.338
Observations 182
R2 Nagelkerke 0.154
m6 <- glm(racial_injustice_inequality_sum ~ 
            1 + 
            as.factor(year) + 
            I(proximity_to_minneapolis_miles/100) +
            I(median_household_income/1000) + 
            I(proportion_of_out_of_school_suspensions_for_black_students_percent/10) +
            white_black_gap_in_math, 
          data = s_tj, family = "poisson")

sjPlot::tab_model(m6)
  racial injustice
inequality sum
Predictors Incidence Rate Ratios CI p
(Intercept) 2.09 0.16 – 28.75 0.577
year [2021] 0.21 0.06 – 0.56 0.005
proximity to minneapolis
miles/100
0.88 0.77 – 0.99 0.040
median household
income/1000
0.99 0.97 – 1.01 0.309
proportion of out of
school suspensions for
black students percent/10
1.03 0.85 – 1.25 0.763
white black gap in math 0.72 0.12 – 4.65 0.722
Observations 182
R2 Nagelkerke 0.208
m7 <- glm(protests_sum ~ 
            1 + 
            as.factor(year) + 
            I(proximity_to_minneapolis_miles/100) +
            I(median_household_income/1000) + 
            I(proportion_of_out_of_school_suspensions_for_black_students_percent/10) +
            white_black_gap_in_math, 
          data = s_tj, family = "poisson")

sjPlot::tab_model(m7)
  protests sum
Predictors Incidence Rate Ratios CI p
(Intercept) 0.39 0.03 – 5.44 0.485
year [2021] 0.05 0.00 – 0.23 0.003
proximity to minneapolis
miles/100
0.97 0.85 – 1.09 0.685
median household
income/1000
0.99 0.97 – 1.01 0.315
proportion of out of
school suspensions for
black students percent/10
1.17 0.96 – 1.43 0.114
white black gap in math 0.75 0.14 – 4.45 0.748
Observations 182
R2 Nagelkerke 0.311
m8 <- glm(police_brutality_sum ~ 
            1 + 
            as.factor(year) + 
            I(proximity_to_minneapolis_miles/100) +
            I(median_household_income/1000) + 
            I(proportion_of_out_of_school_suspensions_for_black_students_percent/10) +
            white_black_gap_in_math, 
          data = s_tj, family = "poisson")

sjPlot::tab_model(m8)
  police brutality sum
Predictors Incidence Rate Ratios CI p
(Intercept) 0.03 0.00 – 41851.59 0.625
year [2021] 0.00 0.00 – Inf 0.998
proximity to minneapolis
miles/100
0.96 0.50 – 1.84 0.897
median household
income/1000
0.94 0.82 – 1.07 0.347
proportion of out of
school suspensions for
black students percent/10
1.84 0.48 – 7.04 0.372
white black gap in math 0.38 0.00 – 264.45 0.772
Observations 182
R2 Nagelkerke 0.421

some alternate models

m1_i <- glm(steps_taken_to_resolve_issues_sum ~ 
            1 + 
            as.factor(year) + 
            I(proximity_to_minneapolis_miles/100) +
            I(median_household_income/1000) + 
            proportion_of_in_school_suspensions_for_black_students_percent +
            white_black_gap_in_ela, 
          data = s_tj, family = "poisson")

sjPlot::tab_model(m1_i)
  steps taken to resolve
issues sum
Predictors Incidence Rate Ratios CI p
(Intercept) 0.58 0.21 – 1.56 0.284
year [2021] 0.44 0.31 – 0.61 <0.001
proximity to minneapolis
miles/100
0.95 0.91 – 1.00 0.082
median household
income/1000
1.01 1.00 – 1.02 0.006
proportion of in school
suspensions for black
students percent
1.02 1.01 – 1.03 <0.001
white black gap in ela 0.63 0.34 – 1.19 0.153
Observations 184
R2 Nagelkerke 0.344

Followers per page

joined <- tweets %>% 
  left_join(ss, by = "page_name") # all join
joined %>% 
  distinct(page_name, .keep_all = TRUE) %>% 
  mutate(followers_at_posting = as.integer(followers_at_posting)) %>% 
  summarize(sum_followers_at_posting = sum(followers_at_posting, na.rm = TRUE),
            mean_followers_at_posting = mean(followers_at_posting, na.rm = TRUE),
            sd_followers_at_posting = sd(followers_at_posting, na.rm = TRUE))
## # A tibble: 1 × 3
##   sum_followers_at_posting mean_followers_at_posting sd_followers_at_posting
##                      <int>                     <dbl>                   <dbl>
## 1                  3151015                    35405.                  24647.