Causal Impact
Number of total users:
neda_liwc %>%
select(id) %>%
distinct() %>%
nrow()
## [1] 431
|
Word Category
|
Relative Eff.(%)
|
P Value
|
|
Female
|
33.031
|
0.001
|
|
Risk
|
20.29
|
0.001
|
|
Relig
|
17.409
|
0.015
|
|
Anx
|
15.936
|
0.046
|
|
We
|
15.857
|
0.004
|
|
Money
|
10.6
|
0.002
|
|
Hear
|
9.708
|
0.002
|
|
Body
|
8.89
|
0.003
|
|
Nonflu
|
7.344
|
0.019
|
|
Power
|
6.675
|
0.001
|
|
Focusfuture
|
6.63
|
0.006
|
|
Relativ
|
6.452
|
0.001
|
|
Tentat
|
4.874
|
0.012
|
|
Focuspresent
|
4.403
|
0.001
|
|
Negemo
|
4.233
|
0.004
|
|
Work
|
3.925
|
0.022
|
|
Social
|
3.632
|
0.001
|
|
Informal
|
3.155
|
0.002
|
|
Focuspast
|
2.694
|
0.040
|
|
Posemo
|
-2.59
|
0.007
|
|
Negate
|
-3.469
|
0.048
|
|
See
|
-15.408
|
0.001
|
|
Ingest
|
-26.529
|
0.025
|
|
Friend
|
-27.165
|
0.001
|
|
Filler
|
-42.13
|
0.001
|
CAUSAL IMPACT BY GENDER
Number of users by gender (400~):
neda_liwc_gender %>%
select(id, gender) %>%
distinct() %>%
count(gender)
## # A tibble: 3 x 2
## gender n
## <chr> <int>
## 1 f 173
## 2 m 65
## 3 u 193
Number of users in the baseline ():
baseline_liwc_gender %>%
select(id, gender) %>%
distinct() %>%
count(gender)
## # A tibble: 3 x 2
## gender n
## <chr> <int>
## 1 f 1699
## 2 m 2066
## 3 u 2978
baseline_liwc_gender %>%
select(-text, -created_at_tweet, -id_tweet, -name) %>%
group_by(gender, id, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
select(-id) %>%
group_by(gender, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline
neda_liwc_gender %>%
select(-id_tweet, -created_at_tweet) %>%
group_by(gender, id, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
select(-id) %>%
group_by(gender, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
pivot_longer(fun:filler, names_to = "categ",
values_to = "values_neda") -> ci_neda_liwc
pre_period <- c(1, 16)
post_period <- c(17, 31)
ci_baseline %>%
full_join(ci_neda_liwc) -> d_second
d_second %>%
# count(gender, days_tweet) %>%
# view()
# view()
# select(categ, values_neda, values_baseline) %>%
# nest(data = - categ) %>%
select(gender, categ, values_neda, values_baseline) %>%
# filter(gender == "m") %>% view()
group_by(categ, gender) %>%
nest() %>%
ungroup() %>%
select(gender, categ, data) %>%
mutate(mod = map(data, ~CausalImpact::CausalImpact(.,
pre_period,
post_period))) -> ci
ci %>%
mutate(summary_mod = map(mod, "summary")) %>%
filter(!map_lgl(summary_mod, is.null)) -> ci_resul
ci_resul %>%
mutate(p = map(summary_mod, "p")) %>%
mutate(p = map_dbl(p, 1)) %>%
filter(categ %in% categories_in_gender) %>%
mutate(relative_effect = map(summary_mod, "RelEffect")) %>%
mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat
# sig_cat %>%
# filter(categ %in% sel_cat) -> sig_cat
sig_cat %>%
arrange(gender, desc(relative_effect)) %>%
select(gender, categ,
second_relative_effect = relative_effect, p_second = p) -> second_ap
second_ap %>%
pivot_wider(id_cols = categ,
values_from = c(second_relative_effect, p_second),
names_from = gender) %>%
mutate(categ = str_to_title(categ)) %>%
mutate_at(vars(starts_with("second")), ~.*100) %>%
mutate_if(is.numeric, ~round(., digits = 3)) %>%
mutate_at(vars(starts_with("second")), function(x){
cell_spec(x, "html", color = spec_color(x), bold = T)
}) %>%
kable("html", escape = F,
align = "lrr",
col.names = c("Word Category", "FEMALE Relative Eff.(%)",
"MALE Relative Eff.(%)",
"UNKNOWN Relative Eff.(%)",
"FEMALE P Value",
"MALE P Value",
"UNKNOWN P Value")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE)
|
Word Category
|
FEMALE Relative Eff.(%)
|
MALE Relative Eff.(%)
|
UNKNOWN Relative Eff.(%)
|
FEMALE P Value
|
MALE P Value
|
UNKNOWN P Value
|
|
Risk
|
27.713
|
7.938
|
10.948
|
0.001
|
0.174
|
0.074
|
|
Female
|
26.164
|
22.59
|
34.262
|
0.001
|
0.005
|
0.001
|
|
Relig
|
25.409
|
-2.293
|
15.585
|
0.014
|
0.393
|
0.102
|
|
We
|
17.588
|
-0.364
|
18.167
|
0.028
|
0.471
|
0.002
|
|
Hear
|
10.457
|
-3.575
|
10.76
|
0.003
|
0.255
|
0.003
|
|
Power
|
9.825
|
0.762
|
5.091
|
0.001
|
0.417
|
0.012
|
|
Anx
|
8.477
|
6.309
|
29.838
|
0.174
|
0.270
|
0.031
|
|
Money
|
8.209
|
31.643
|
-0.583
|
0.029
|
0.001
|
0.464
|
|
Tentat
|
7.838
|
2.283
|
3.036
|
0.011
|
0.262
|
0.141
|
|
Relativ
|
7.719
|
1.424
|
2.714
|
0.001
|
0.256
|
0.070
|
|
Informal
|
6.653
|
4.143
|
0.35
|
0.001
|
0.001
|
0.391
|
|
Work
|
6.207
|
0.102
|
2.455
|
0.016
|
0.477
|
0.190
|
|
Focuspast
|
5.91
|
9.87
|
-4.13
|
0.010
|
0.007
|
0.024
|
|
Social
|
4.541
|
4.411
|
3.713
|
0.001
|
0.002
|
0.003
|
|
Focuspresent
|
3.932
|
10.171
|
3.492
|
0.007
|
0.001
|
0.001
|
|
Focusfuture
|
2.705
|
9.076
|
13.746
|
0.267
|
0.002
|
0.001
|
|
Nonflu
|
2.614
|
15.021
|
8.292
|
0.326
|
0.042
|
0.079
|
|
Negemo
|
1.98
|
13.916
|
2.728
|
0.253
|
0.001
|
0.156
|
|
Body
|
-0.127
|
31.309
|
15.016
|
0.494
|
0.001
|
0.001
|
|
Negate
|
-6.244
|
1.931
|
-0.057
|
0.080
|
0.346
|
0.483
|
|
Posemo
|
-7.802
|
-0.554
|
-0.266
|
0.001
|
0.434
|
0.423
|
|
Ingest
|
-17.991
|
-27.943
|
-12.164
|
0.188
|
0.012
|
0.207
|
|
See
|
-20.432
|
1.343
|
-17.405
|
0.001
|
0.419
|
0.001
|
|
Friend
|
-34.369
|
-16.925
|
-24.181
|
0.001
|
0.092
|
0.008
|
|
Filler
|
-40.55
|
-34.464
|
-52.18
|
0.002
|
0.007
|
0.001
|
Words inside categ:
The tables below contain the changes for the 4 most changed words (positive and negative) within each of the categories. The first column is the specific word. Changes_total is the difference between the “before” and “after” frequency of all users. Changes_f same as Changes_total but for female users, Changes_m for male users and Changes_u for unknown users.
library(tidyverse)
read_delim(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
delim = "\t", skip = 1, col_names = c("number", "name"),
n_max = 73) -> categories_name
read_tsv(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
skip = 75, col_names = paste0(c("x"), c("_"), 1:11),
guess_max = 6000, col_types = "ccccccccccc") %>%
mutate(x_1 = str_remove(x_1, "\\*")) -> words_dic
categories_name %>%
filter(name %in% c("female", "family", "anx", "shehe", "affiliation", "friend")) %>%
pull(number) -> number_top_categ
words_dic %>%
filter_at(vars(-x_1), any_vars(. %in% number_top_categ)) %>%
pull(x_1) -> words_cat
# neda_timelapse ----------------------------------------------------------
library(lubridate)
# neda_hist <- read_rds(here::here("data", "NEDA_historical_twenty.rds")) %>%
# distinct() %>%
# filter(created_at_tweet >= ymd("2019-03-01")) %>%
# mutate(text = str_to_lower(text),
# neda_related = str_detect(text,
# pattern = "#nedawareness|#comeasyouare|@nedastaff"))
# gender_output <- read_tsv(here::here("data", "gender_extractor",
# "neda_liwc_gender_output.tsv"),
# col_names = c("id", "name", "name_proc", "gender"))
gender_output <- read_rds(here::here("data", "neda_liwc_gender_twenty.rds")) %>%
select(gender, id_tweet, id)
# first_tweet <- neda_hist %>%
# select(created_at_tweet, text, id, id_tweet, neda_related) %>%
# filter(neda_related) %>%
# arrange(created_at_tweet) %>%
# group_by(id) %>%
# slice(1) %>%
# ungroup() %>%
# select(cero_date = created_at_tweet, id) %>%
# filter(cero_date >= ymd("2019-01-01"))
#
# neda_change <- neda_hist %>%
# select(id_tweet, text, created_at_tweet, id) %>%
# inner_join(first_tweet) %>%
# mutate(days_tweet = interval(start = cero_date, end = created_at_tweet),
# days_tweet = round(time_length(days_tweet, unit = "days"))) %>%
# select(-cero_date) %>%
# filter(days_tweet >= -15, days_tweet <= 15)
#
# neda_timelapse <- neda_change %>%
# count(id, before_after = sign(days_tweet), sort = T) %>%
# mutate(before_after = case_when(before_after == -1 ~ "before",
# before_after == 1 ~ "after",
# TRUE ~ "cero")) %>%
# pivot_wider(values_from = n, names_from = before_after) %>%
# filter(before >= 15 & after >= 15) %>%
# select(-cero) %>%
# semi_join(x = neda_change, y = .)
read_rds(here::here("data", "neda_timelapse_twenty.rds")) %>%
inner_join(gender_output) -> neda_timelapse
library(tidytext)
replace_reg1 <- "https://t.co/[A-Za-z]\\d]+|"
replace_reg2 <- "https://t.co/[A-Za-z]\\d]+|&|<|>|RT|https"
replace_reg <- paste0(replace_reg1, replace_reg2)
unnest_reg <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"
tidy_tweets <- neda_timelapse %>%
mutate(text = str_to_lower(text)) %>%
mutate(text = str_replace_all(text, replace_reg, "")) %>%
unnest_tokens(word, text, token = "regex", pattern = unnest_reg)
tidy_tweets %>%
filter(word %in% words_cat) %>%
mutate(days_tweet = case_when(days_tweet < 0 ~ "before",
days_tweet > 0 ~ "after",
TRUE ~ "cero")) %>%
filter(days_tweet != "cero") -> tidy_words
neda_liwc <- read_rds(here::here("data", "neda_liwc_twenty.rds")) %>%
select(-created_at_tweet) %>%
filter(abs(days_tweet) <= 15)
FEMALE
# female ------------------------------------------------------------------
# female -> 43
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 43)) %>%
pull(x_1) -> words_female
tidy_words %>%
filter(word %in% words_female) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_female) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| women |
816 |
323 |
110 |
383 |
| she |
489 |
181 |
67 |
241 |
| her |
431 |
234 |
51 |
146 |
| madam |
233 |
39 |
2 |
192 |
| girly |
-9 |
-4 |
-3 |
-2 |
| witch |
-9 |
-10 |
-2 |
3 |
| gal |
-10 |
-7 |
-2 |
-1 |
| gals |
-14 |
-5 |
0 |
-9 |
| lesbian |
-56 |
-24 |
-6 |
-26 |
RISK
# risk -> 85
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 85)) %>%
pull(x_1) -> words_risk
tidy_words %>%
filter(word %in% words_risk) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_risk) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| avoid |
92 |
39 |
19 |
34 |
| risk |
89 |
50 |
28 |
11 |
| threat |
51 |
32 |
7 |
12 |
| alarm |
11 |
4 |
2 |
5 |
| dread |
3 |
0 |
2 |
1 |
| doubt |
2 |
2 |
3 |
-3 |
| inhibit |
1 |
1 |
0 |
0 |
| unsure |
-3 |
0 |
-2 |
-1 |
RELIGION
# relig -> 114
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 114)) %>%
pull(x_1) -> words_relig
tidy_words %>%
filter(word %in% words_relig) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_relig) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| church |
31 |
20 |
9 |
2 |
| goddess |
4 |
2 |
0 |
2 |
| church |
31 |
20 |
9 |
2 |
| goddess |
4 |
2 |
0 |
2 |
MONEY
# money -> 113
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 113)) %>%
pull(x_1) -> words_money
tidy_words %>%
filter(word %in% words_money) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_money) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| donate |
10 |
6 |
1 |
3 |
| donation |
10 |
0 |
6 |
4 |
| donate |
10 |
6 |
1 |
3 |
| donation |
10 |
0 |
6 |
4 |
BODY
# body -> 71
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 114)) %>%
pull(x_1) -> words_body
tidy_words %>%
filter(word %in% words_body) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_body) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| church |
31 |
20 |
9 |
2 |
| goddess |
4 |
2 |
0 |
2 |
| church |
31 |
20 |
9 |
2 |
| goddess |
4 |
2 |
0 |
2 |
