Causal Impact
CAUSAL IMPACT BY GENDER
baseline_liwc_gender %>%
select(-text, -created_at, -id_tweet, -name) %>%
group_by(gender, id, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
select(-id) %>%
group_by(gender, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline
neda_liwc_gender %>%
select(-id_tweet, -name, -name_proc) %>%
group_by(gender, id, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
select(-id) %>%
group_by(gender, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
pivot_longer(fun:filler, names_to = "categ",
values_to = "values_neda") -> ci_neda_liwc
pre_period <- c(1, 16)
post_period <- c(17, 31)
ci_baseline %>%
full_join(ci_neda_liwc) -> d_second
d_second %>%
# count(gender, days_tweet) %>%
# view()
# view()
# select(categ, values_neda, values_baseline) %>%
# nest(data = - categ) %>%
select(gender, categ, values_neda, values_baseline) %>%
# filter(gender == "m") %>% view()
group_by(categ, gender) %>%
nest() %>%
ungroup() %>%
select(gender, categ, data) %>%
mutate(mod = map(data, ~CausalImpact::CausalImpact(.,
pre_period,
post_period))) -> ci
ci %>%
mutate(summary_mod = map(mod, "summary")) %>%
filter(!map_lgl(summary_mod, is.null)) -> ci_resul
ci_resul %>%
mutate(p = map(summary_mod, "p")) %>%
mutate(p = map_dbl(p, 1)) %>%
filter(categ %in% c("female",
"family",
"anx",
"relig",
"money",
"they",
"achiev",
"negate",
"health",
"power",
"negemo",
"informal",
"ipron",
"you",
"discrep",
"differ",
"tentat",
"posemo",
"shehe",
"affiliation")) %>%
mutate(relative_effect = map(summary_mod, "RelEffect")) %>%
mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat
# sig_cat %>%
# filter(categ %in% sel_cat) -> sig_cat
sig_cat %>%
arrange(gender, desc(relative_effect)) %>%
select(gender, categ,
second_relative_effect = relative_effect, p_second = p) -> second_ap
second_ap %>%
pivot_wider(id_cols = categ,
values_from = c(second_relative_effect, p_second),
names_from = gender) %>%
mutate(categ = str_to_title(categ)) %>%
mutate_at(vars(starts_with("second")), ~.*100) %>%
mutate_if(is.numeric, ~round(., digits = 3)) %>%
mutate_at(vars(starts_with("second")), function(x){
cell_spec(x, "html", color = spec_color(x), bold = T)
}) %>%
kable("html", escape = F,
align = "lrr",
col.names = c("Word Category", "FEMALE Relative Eff.(%)",
"MALE Relative Eff.(%)",
"UNKNOWN Relative Eff.(%)",
"FEMALE P Value",
"MALE P Value",
"UNKNOWN P Value")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE)
|
Word Category
|
FEMALE Relative Eff.(%)
|
MALE Relative Eff.(%)
|
UNKNOWN Relative Eff.(%)
|
FEMALE P Value
|
MALE P Value
|
UNKNOWN P Value
|
|
Anx
|
13.196
|
1.808
|
10.928
|
0.001
|
0.313
|
0.005
|
|
Female
|
12.601
|
15.836
|
19.852
|
0.001
|
0.001
|
0.001
|
|
Family
|
7.904
|
1.151
|
14.691
|
0.012
|
0.350
|
0.001
|
|
Achiev
|
6.533
|
-1.29
|
0.71
|
0.004
|
0.289
|
0.384
|
|
Relig
|
5.685
|
-1.697
|
17.331
|
0.030
|
0.328
|
0.001
|
|
They
|
5.086
|
-1.85
|
3.6
|
0.005
|
0.291
|
0.057
|
|
Negemo
|
4.489
|
-4.545
|
-0.366
|
0.001
|
0.008
|
0.438
|
|
Money
|
4.037
|
16.973
|
5.994
|
0.035
|
0.001
|
0.021
|
|
Power
|
3.875
|
2.584
|
4.018
|
0.001
|
0.044
|
0.003
|
|
Negate
|
3.107
|
-2.063
|
3.02
|
0.015
|
0.158
|
0.104
|
|
Health
|
2.93
|
5.301
|
-4.463
|
0.070
|
0.060
|
0.009
|
|
Ipron
|
0.376
|
-1.107
|
-1.675
|
0.358
|
0.229
|
0.049
|
|
Informal
|
0.239
|
2.044
|
1.418
|
0.365
|
0.036
|
0.068
|
|
Tentat
|
-0.532
|
-1.518
|
-6.818
|
0.380
|
0.176
|
0.001
|
|
Differ
|
-1.323
|
-2.077
|
-0.054
|
0.137
|
0.070
|
0.485
|
|
Posemo
|
-2.319
|
-5.974
|
-1.489
|
0.005
|
0.001
|
0.092
|
|
You
|
-2.321
|
0.197
|
-3.912
|
0.021
|
0.451
|
0.002
|
|
Discrep
|
-3.547
|
-5.811
|
0.608
|
0.006
|
0.023
|
0.334
|
|
Affiliation
|
-5.274
|
-7.818
|
-7.205
|
0.004
|
0.001
|
0.011
|
|
Shehe
|
-7.708
|
-3.775
|
-5.058
|
0.023
|
0.221
|
0.050
|
Words inside categ:
The tables below contain the changes for the 4 most changed words (positive and negative) within each of the categories. The first column is the specific word. Changes_total is the difference between the “before” and “after” frequency of all users. Changes_f same as Changes_total but for female users, Changes_m for male users and Changes_u for unknown users.
library(tidyverse)
read_delim(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
delim = "\t", skip = 1, col_names = c("number", "name"),
n_max = 73) -> categories_name
read_tsv(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
skip = 75, col_names = paste0(c("x"), c("_"), 1:11),
guess_max = 6000, col_types = "ccccccccccc") %>%
mutate(x_1 = str_remove(x_1, "\\*")) -> words_dic
categories_name %>%
filter(name %in% c("female", "family", "anx", "shehe", "affiliation", "friend")) %>%
pull(number) -> number_top_categ
words_dic %>%
filter_at(vars(-x_1), any_vars(. %in% number_top_categ)) %>%
pull(x_1) -> words_cat
# neda_timelapse ----------------------------------------------------------
library(lubridate)
neda_hist <- read_rds(here::here("data", "NEDA_historical.rds")) %>%
distinct() %>%
filter(created_at_tweet >= ymd("2018-03-01")) %>%
mutate(text = str_to_lower(text),
neda_related = str_detect(text,
pattern = "#nedawareness|#comeasyouare|@nedastaff"))
gender_output <- read_tsv(here::here("data", "gender_extractor",
"neda_liwc_gender_output.tsv"),
col_names = c("id", "name", "name_proc", "gender"))
first_tweet <- neda_hist %>%
select(created_at_tweet, text, id, id_tweet, neda_related) %>%
filter(neda_related) %>%
arrange(created_at_tweet) %>%
group_by(id) %>%
slice(1) %>%
ungroup() %>%
select(cero_date = created_at_tweet, id) %>%
filter(cero_date >= ymd("2019-01-01"))
neda_change <- neda_hist %>%
select(id_tweet, text, created_at_tweet, id) %>%
inner_join(first_tweet) %>%
mutate(days_tweet = interval(start = cero_date, end = created_at_tweet),
days_tweet = round(time_length(days_tweet, unit = "days"))) %>%
select(-cero_date) %>%
filter(days_tweet >= -15, days_tweet <= 15)
neda_timelapse <- neda_change %>%
count(id, before_after = sign(days_tweet), sort = T) %>%
mutate(before_after = case_when(before_after == -1 ~ "before",
before_after == 1 ~ "after",
TRUE ~ "cero")) %>%
pivot_wider(values_from = n, names_from = before_after) %>%
filter(before >= 15 & after >= 15) %>%
select(-cero) %>%
semi_join(x = neda_change, y = .)
neda_timelapse %>%
inner_join(gender_output) -> neda_timelapse
library(tidytext)
replace_reg1 <- "https://t.co/[A-Za-z]\\d]+|"
replace_reg2 <- "https://t.co/[A-Za-z]\\d]+|&|<|>|RT|https"
replace_reg <- paste0(replace_reg1, replace_reg2)
unnest_reg <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"
tidy_tweets <- neda_timelapse %>%
mutate(text = str_to_lower(text)) %>%
mutate(text = str_replace_all(text, replace_reg, "")) %>%
unnest_tokens(word, text, token = "regex", pattern = unnest_reg)
tidy_tweets %>%
filter(word %in% words_cat) %>%
mutate(days_tweet = case_when(days_tweet < 0 ~ "before",
days_tweet > 0 ~ "after",
TRUE ~ "cero")) %>%
filter(days_tweet != "cero") -> tidy_words
neda_liwc <- read_rds(here::here("data", "neda_liwc.rds")) %>%
select(-created_at_tweet) %>%
filter(abs(days_tweet) <= 15)
FEMALE CATEGORY
# female ------------------------------------------------------------------
# female -> 43
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 43)) %>%
pull(x_1) -> words_female
tidy_words %>%
filter(word %in% words_female) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_female) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| women |
2117 |
896 |
404 |
817 |
| her |
320 |
265 |
-1 |
56 |
| she |
228 |
307 |
-27 |
-52 |
| woman |
207 |
82 |
34 |
91 |
| gf |
-30 |
-19 |
-7 |
-4 |
| wife |
-78 |
-37 |
-33 |
-8 |
| queen |
-96 |
3 |
-32 |
-67 |
| lady |
-158 |
-112 |
-21 |
-25 |
FAMILY
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 41)) %>%
pull(x_1) -> words_family
tidy_words %>%
filter(word %in% words_family) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_family) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| ma |
132 |
52 |
54 |
26 |
| family |
122 |
98 |
14 |
10 |
| daughter |
115 |
39 |
24 |
52 |
| families |
88 |
49 |
22 |
17 |
| mama |
-19 |
-23 |
0 |
4 |
| pa |
-19 |
-14 |
7 |
-12 |
| daddy |
-23 |
-9 |
-6 |
-8 |
| bro |
-63 |
7 |
31 |
-101 |
| wife |
-78 |
-37 |
-33 |
-8 |
ANXIETY
# anxiety -----------------------------------------------------------------
# anxiety -> 33
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 33)) %>%
pull(x_1) -> words_anxiety
tidy_words %>%
filter(word %in% words_anxiety) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_anxiety) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| risk |
190 |
15 |
115 |
60 |
| stress |
78 |
38 |
13 |
27 |
| upset |
73 |
30 |
13 |
30 |
| worried |
70 |
31 |
15 |
24 |
| confuse |
-10 |
-6 |
1 |
-5 |
| horrible |
-11 |
-5 |
1 |
-7 |
| doubt |
-21 |
-11 |
-1 |
-9 |
| scared |
-31 |
16 |
-12 |
-35 |
