Causal Impact
CAUSAL IMPACT BY GENDER
baseline_liwc_gender %>%
select(-text, -created_at, -id_tweet, -name) %>%
group_by(gender, id, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
select(-id) %>%
group_by(gender, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline
neda_liwc_gender %>%
select(-id_tweet, -name, -name_proc) %>%
group_by(gender, id, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
select(-id) %>%
group_by(gender, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
pivot_longer(fun:filler, names_to = "categ",
values_to = "values_neda") -> ci_neda_liwc
pre_period <- c(1, 16)
post_period <- c(17, 31)
ci_baseline %>%
full_join(ci_neda_liwc) -> d_second
d_second %>%
# count(gender, days_tweet) %>%
# view()
# view()
# select(categ, values_neda, values_baseline) %>%
# nest(data = - categ) %>%
select(gender, categ, values_neda, values_baseline) %>%
# filter(gender == "m") %>% view()
group_by(categ, gender) %>%
nest() %>%
ungroup() %>%
select(gender, categ, data) %>%
mutate(mod = map(data, ~CausalImpact::CausalImpact(.,
pre_period,
post_period))) -> ci
ci %>%
mutate(summary_mod = map(mod, "summary")) %>%
filter(!map_lgl(summary_mod, is.null)) -> ci_resul
ci_resul %>%
mutate(p = map(summary_mod, "p")) %>%
mutate(p = map_dbl(p, 1)) %>%
filter(categ %in% c("female",
"family",
"anx",
"relig",
"money",
"they",
"achiev",
"negate",
"health",
"power",
"negemo",
"informal",
"ipron",
"you",
"discrep",
"differ",
"tentat",
"posemo",
"shehe",
"affiliation")) %>%
mutate(relative_effect = map(summary_mod, "RelEffect")) %>%
mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat
# sig_cat %>%
# filter(categ %in% sel_cat) -> sig_cat
sig_cat %>%
arrange(gender, desc(relative_effect)) %>%
select(gender, categ,
second_relative_effect = relative_effect, p_second = p) -> second_ap
second_ap %>%
pivot_wider(id_cols = categ,
values_from = c(second_relative_effect, p_second),
names_from = gender) %>%
mutate(categ = str_to_title(categ)) %>%
mutate_at(vars(starts_with("second")), ~.*100) %>%
mutate_if(is.numeric, ~round(., digits = 3)) %>%
mutate_at(vars(starts_with("second")), function(x){
cell_spec(x, "html", color = spec_color(x), bold = T)
}) %>%
kable("html", escape = F,
align = "lrr",
col.names = c("Word Category", "FEMALE Relative Eff.(%)",
"MALE Relative Eff.(%)",
"UNKNOWN Relative Eff.(%)",
"FEMALE P Value",
"MALE P Value",
"UNKNOWN P Value")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE)
|
Word Category
|
FEMALE Relative Eff.(%)
|
MALE Relative Eff.(%)
|
UNKNOWN Relative Eff.(%)
|
FEMALE P Value
|
MALE P Value
|
UNKNOWN P Value
|
|
Anx
|
13.196
|
1.808
|
10.928
|
0.001
|
0.313
|
0.005
|
|
Female
|
12.601
|
15.836
|
19.852
|
0.001
|
0.001
|
0.001
|
|
Family
|
7.904
|
1.151
|
14.691
|
0.012
|
0.350
|
0.001
|
|
Achiev
|
6.533
|
-1.29
|
0.71
|
0.004
|
0.289
|
0.384
|
|
Relig
|
5.685
|
-1.697
|
17.331
|
0.030
|
0.328
|
0.001
|
|
They
|
5.086
|
-1.85
|
3.6
|
0.005
|
0.291
|
0.057
|
|
Negemo
|
4.489
|
-4.545
|
-0.366
|
0.001
|
0.008
|
0.438
|
|
Money
|
4.037
|
16.973
|
5.994
|
0.035
|
0.001
|
0.021
|
|
Power
|
3.875
|
2.584
|
4.018
|
0.001
|
0.044
|
0.003
|
|
Negate
|
3.107
|
-2.063
|
3.02
|
0.015
|
0.158
|
0.104
|
|
Health
|
2.93
|
5.301
|
-4.463
|
0.070
|
0.060
|
0.009
|
|
Ipron
|
0.376
|
-1.107
|
-1.675
|
0.358
|
0.229
|
0.049
|
|
Informal
|
0.239
|
2.044
|
1.418
|
0.365
|
0.036
|
0.068
|
|
Tentat
|
-0.532
|
-1.518
|
-6.818
|
0.380
|
0.176
|
0.001
|
|
Differ
|
-1.323
|
-2.077
|
-0.054
|
0.137
|
0.070
|
0.485
|
|
Posemo
|
-2.319
|
-5.974
|
-1.489
|
0.005
|
0.001
|
0.092
|
|
You
|
-2.321
|
0.197
|
-3.912
|
0.021
|
0.451
|
0.002
|
|
Discrep
|
-3.547
|
-5.811
|
0.608
|
0.006
|
0.023
|
0.334
|
|
Affiliation
|
-5.274
|
-7.818
|
-7.205
|
0.004
|
0.001
|
0.011
|
|
Shehe
|
-7.708
|
-3.775
|
-5.058
|
0.023
|
0.221
|
0.050
|
Words inside categ:
The tables below contain the changes for the 4 most changed words (positive and negative) within each of the categories. The first column is the specific word. Changes_total is the difference between the “before” and “after” frequency of all users. Changes_f same as Changes_total but for female users, Changes_m for male users and Changes_u for unknown users.
library(tidyverse)
read_delim(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
delim = "\t", skip = 1, col_names = c("number", "name"),
n_max = 73) -> categories_name
read_tsv(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
skip = 75, col_names = paste0(c("x"), c("_"), 1:11),
guess_max = 6000, col_types = "ccccccccccc") %>%
mutate(x_1 = str_remove(x_1, "\\*")) -> words_dic
categories_name %>%
filter(name %in% c("female", "family", "anx", "shehe", "affiliation", "friend")) %>%
pull(number) -> number_top_categ
words_dic %>%
filter_at(vars(-x_1), any_vars(. %in% number_top_categ)) %>%
pull(x_1) -> words_cat
# neda_timelapse ----------------------------------------------------------
library(lubridate)
neda_hist <- read_rds(here::here("data", "NEDA_historical.rds")) %>%
distinct() %>%
filter(created_at_tweet >= ymd("2018-03-01")) %>%
mutate(text = str_to_lower(text),
neda_related = str_detect(text,
pattern = "#nedawareness|#comeasyouare|@nedastaff"))
gender_output <- read_tsv(here::here("data", "gender_extractor",
"neda_liwc_gender_output.tsv"),
col_names = c("id", "name", "name_proc", "gender"))
first_tweet <- neda_hist %>%
select(created_at_tweet, text, id, id_tweet, neda_related) %>%
filter(neda_related) %>%
arrange(created_at_tweet) %>%
group_by(id) %>%
slice(1) %>%
ungroup() %>%
select(cero_date = created_at_tweet, id) %>%
filter(cero_date >= ymd("2019-01-01"))
neda_change <- neda_hist %>%
select(id_tweet, text, created_at_tweet, id) %>%
inner_join(first_tweet) %>%
mutate(days_tweet = interval(start = cero_date, end = created_at_tweet),
days_tweet = round(time_length(days_tweet, unit = "days"))) %>%
select(-cero_date) %>%
filter(days_tweet >= -15, days_tweet <= 15)
neda_timelapse <- neda_change %>%
count(id, before_after = sign(days_tweet), sort = T) %>%
mutate(before_after = case_when(before_after == -1 ~ "before",
before_after == 1 ~ "after",
TRUE ~ "cero")) %>%
pivot_wider(values_from = n, names_from = before_after) %>%
filter(before >= 15 & after >= 15) %>%
select(-cero) %>%
semi_join(x = neda_change, y = .)
neda_timelapse %>%
inner_join(gender_output) -> neda_timelapse
library(tidytext)
replace_reg1 <- "https://t.co/[A-Za-z]\\d]+|"
replace_reg2 <- "https://t.co/[A-Za-z]\\d]+|&|<|>|RT|https"
replace_reg <- paste0(replace_reg1, replace_reg2)
unnest_reg <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"
tidy_tweets <- neda_timelapse %>%
mutate(text = str_to_lower(text)) %>%
mutate(text = str_replace_all(text, replace_reg, "")) %>%
unnest_tokens(word, text, token = "regex", pattern = unnest_reg)
tidy_tweets %>%
filter(word %in% words_cat) %>%
mutate(days_tweet = case_when(days_tweet < 0 ~ "before",
days_tweet > 0 ~ "after",
TRUE ~ "cero")) %>%
filter(days_tweet != "cero") -> tidy_words
neda_liwc <- read_rds(here::here("data", "neda_liwc.rds")) %>%
select(-created_at_tweet) %>%
filter(abs(days_tweet) <= 15)
FEMALE CATEGORY
# female ------------------------------------------------------------------
# female -> 43
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 43)) %>%
pull(x_1) -> words_female
tidy_words %>%
filter(word %in% words_female) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_female) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| women |
2117 |
896 |
404 |
817 |
| her |
320 |
265 |
-1 |
56 |
| she |
228 |
307 |
-27 |
-52 |
| woman |
207 |
82 |
34 |
91 |
| gf |
-30 |
-19 |
-7 |
-4 |
| wife |
-78 |
-37 |
-33 |
-8 |
| queen |
-96 |
3 |
-32 |
-67 |
| lady |
-158 |
-112 |
-21 |
-25 |
FAMILY
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 41)) %>%
pull(x_1) -> words_family
tidy_words %>%
filter(word %in% words_family) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_family) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| ma |
132 |
52 |
54 |
26 |
| family |
122 |
98 |
14 |
10 |
| daughter |
115 |
39 |
24 |
52 |
| families |
88 |
49 |
22 |
17 |
| mama |
-19 |
-23 |
0 |
4 |
| pa |
-19 |
-14 |
7 |
-12 |
| daddy |
-23 |
-9 |
-6 |
-8 |
| bro |
-63 |
7 |
31 |
-101 |
| wife |
-78 |
-37 |
-33 |
-8 |
ANXIETY
# anxiety -----------------------------------------------------------------
# anxiety -> 33
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 33)) %>%
pull(x_1) -> words_anxiety
tidy_words %>%
filter(word %in% words_anxiety) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_anxiety) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| risk |
190 |
15 |
115 |
60 |
| stress |
78 |
38 |
13 |
27 |
| upset |
73 |
30 |
13 |
30 |
| worried |
70 |
31 |
15 |
24 |
| confuse |
-10 |
-6 |
1 |
-5 |
| horrible |
-11 |
-5 |
1 |
-7 |
| doubt |
-21 |
-11 |
-1 |
-9 |
| scared |
-31 |
16 |
-12 |
-35 |
---
title: "Causal Impact" 
clean: true
output:
  bookdown::html_document2:
    number_sections: false
    code_download: true
    code_folding: hide
    self_contained: true
    toc: true
    toc_float: false
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

xfun::pkg_attach("tidyverse", "lubridate", "kableExtra")

theme_set(theme_linedraw())
```

## Causal Impact 

```{r eval=FALSE, include=FALSE}
baseline_liwc <- read_rds(here::here("data", "baseline_liwc.rds"))
neda_liwc <- read_rds(here::here("data", "neda_liwc.rds")) %>% 
  select(-text, -created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15) %>% 
  

sel_cat <- c(
  "i","we","you","shehe","they","ipron","negate","compare","posemo","negemo","anx","anger","sad","social","family","friend","female","male","insight","cause","discrep","tentat","certain","differ","see","hear","feel","body","health","sexual","ingest","affiliation","achiev","power","reward","risk","focuspast","focuspresent","focusfuture","relativ","work","leisure","home","money","relig","death","informal","swear","assent","nonflu","filler"
)
```

```{r eval=FALSE, include=FALSE}
baseline_liwc %>% 
  select(-text, -created_at, -id_tweet) %>% 
  group_by(days_tweet, id) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline

neda_liwc %>% 
  select(-id_tweet) %>% 
  group_by(days_tweet, id) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(-days_tweet, names_to = "categ", values_to = "values_neda") -> ci_neda_liwc 
  
pre_period <- c(1, 16)
post_period <- c(17, 31)

ci_baseline %>% 
  inner_join(ci_neda_liwc) -> d_second

d_second %>% 
  select(categ, values_neda, values_baseline) %>% 
  nest(data = - categ) %>% 
  mutate(mod = map(data, ~CausalImpact::CausalImpact(., pre_period, post_period))) -> ci
  
ci %>% 
  mutate(summary_mod = map(mod, "summary")) %>% 
  filter(!map_lgl(summary_mod, is.null)) -> ci_resul 

ci_resul %>% 
  mutate(p = map(summary_mod, "p")) %>% 
  mutate(p = map_dbl(p, 1)) %>% 
  filter(p < 0.05) %>% 
  mutate(relative_effect = map(summary_mod, "RelEffect")) %>% 
  mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat

sig_cat %>% 
  filter(categ %in% sel_cat) -> sig_cat

sig_cat %>% 
  arrange(desc(relative_effect)) %>% 
  select(categ, second_relative_effect = relative_effect, p_second = p) -> second_ap

second_ap %>% 
  mutate(categ = str_to_title(categ)) %>% 
  mutate_at(vars(second_relative_effect), ~.*100) %>% 
  mutate_if(is.numeric, ~round(., digits = 3)) %>% 
  mutate_at(vars(second_relative_effect), function(x){
    cell_spec(x, "html", color = spec_color(x), bold = T)
  }) %>% 
  kable("html", escape = F,
        align = "lrr",
        col.names = c("Word Category", "Relative Eff.(%)",
                      "P Value")) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)
```

## CAUSAL IMPACT BY GENDER

```{r echo=FALSE}
baseline_liwc_gender <- read_rds(here::here("data",
                                            "baseline_liwc_gender.rds"))

neda_liwc_gender <- read_rds(here::here("data", "neda_liwc_gender.rds")) %>% 
  # select(-text, -created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15)

sel_cat <- c(
  "i","we","you","shehe","they","ipron","negate","compare","posemo","negemo","anx","anger","sad","social","family","friend","female","male","insight","cause","discrep","tentat","certain","differ","see","hear","feel","body","health","sexual","ingest","affiliation","achiev","power","reward","risk","focuspast","focuspresent","focusfuture","relativ","work","leisure","home","money","relig","death","informal","swear","assent","nonflu","filler"
)
```

```{r }
baseline_liwc_gender %>% 
  select(-text, -created_at, -id_tweet, -name) %>% 
  group_by(gender, id, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(gender, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline

neda_liwc_gender %>% 
  select(-id_tweet, -name, -name_proc) %>% 
  group_by(gender, id, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(gender, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(fun:filler, names_to = "categ", 
               values_to = "values_neda") -> ci_neda_liwc 

pre_period <- c(1, 16)
post_period <- c(17, 31)

ci_baseline %>% 
  full_join(ci_neda_liwc) -> d_second

d_second %>% 
  # count(gender, days_tweet) %>% 
  # view()
  # view()
  # select(categ, values_neda, values_baseline) %>% 
  # nest(data = - categ) %>% 
  select(gender, categ, values_neda, values_baseline) %>%
  # filter(gender == "m") %>% view()
  group_by(categ, gender) %>% 
  nest() %>% 
  ungroup() %>% 
  select(gender, categ, data) %>% 
  mutate(mod = map(data, ~CausalImpact::CausalImpact(., 
                                                     pre_period,
                                                     post_period))) -> ci
  
ci %>% 
  mutate(summary_mod = map(mod, "summary")) %>% 
  filter(!map_lgl(summary_mod, is.null)) -> ci_resul 

ci_resul %>% 
  mutate(p = map(summary_mod, "p")) %>% 
  mutate(p = map_dbl(p, 1)) %>% 
  filter(categ %in% c("female",
                      "family",
                      "anx",
                      "relig",
                      "money",
                      "they",
                      "achiev",
                      "negate",
                      "health",
                      "power",
                      "negemo",
                      "informal",
                      "ipron",
                      "you",
                      "discrep",
                      "differ",
                      "tentat",
                      "posemo",
                      "shehe",
                      "affiliation")) %>% 
  mutate(relative_effect = map(summary_mod, "RelEffect")) %>% 
  mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat

# sig_cat %>% 
#   filter(categ %in% sel_cat) -> sig_cat

sig_cat %>% 
  arrange(gender, desc(relative_effect)) %>% 
  select(gender, categ, 
         second_relative_effect = relative_effect, p_second = p) -> second_ap

second_ap %>% 
  pivot_wider(id_cols = categ, 
              values_from = c(second_relative_effect, p_second),
              names_from = gender) %>% 
  mutate(categ = str_to_title(categ)) %>% 
  mutate_at(vars(starts_with("second")), ~.*100) %>% 
  mutate_if(is.numeric, ~round(., digits = 3)) %>% 
  mutate_at(vars(starts_with("second")), function(x){
    cell_spec(x, "html", color = spec_color(x), bold = T)
  }) %>% 
  kable("html", escape = F,
        align = "lrr",
        col.names = c("Word Category", "FEMALE Relative Eff.(%)",
                      "MALE Relative Eff.(%)",
                      "UNKNOWN Relative Eff.(%)",
                      "FEMALE P Value",
                      "MALE P Value",
                      "UNKNOWN P Value")) %>%  
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)
```

## Words inside categ:

The tables below contain the changes for the 4 most changed words (positive and negative) within each of the categories. The first column is the specific word. Changes_total is the difference between the "before" and "after" frequency of all users. Changes_f same as Changes_total but for female users, Changes_m for male users and Changes_u for unknown users.

```{r}
library(tidyverse)

read_delim(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
           delim = "\t", skip = 1, col_names = c("number", "name"), 
           n_max = 73) -> categories_name 

read_tsv(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
         skip = 75, col_names = paste0(c("x"), c("_"), 1:11),
         guess_max = 6000, col_types = "ccccccccccc") %>% 
  mutate(x_1 = str_remove(x_1, "\\*")) -> words_dic

categories_name %>% 
  filter(name %in% c("female", "family", "anx", "shehe", "affiliation", "friend")) %>% 
  pull(number) -> number_top_categ

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. %in% number_top_categ)) %>%  
  pull(x_1) -> words_cat


# neda_timelapse ----------------------------------------------------------

library(lubridate)

neda_hist <- read_rds(here::here("data", "NEDA_historical.rds")) %>% 
  distinct() %>% 
  filter(created_at_tweet >= ymd("2018-03-01")) %>% 
  mutate(text = str_to_lower(text),
         neda_related = str_detect(text, 
                                   pattern = "#nedawareness|#comeasyouare|@nedastaff")) 

gender_output <- read_tsv(here::here("data", "gender_extractor", 
                                     "neda_liwc_gender_output.tsv"),
                          col_names = c("id", "name", "name_proc", "gender"))

first_tweet <- neda_hist %>% 
  select(created_at_tweet, text, id, id_tweet, neda_related) %>% 
  filter(neda_related) %>% 
  arrange(created_at_tweet) %>% 
  group_by(id) %>% 
  slice(1) %>% 
  ungroup() %>% 
  select(cero_date = created_at_tweet, id) %>% 
  filter(cero_date >= ymd("2019-01-01"))

neda_change <- neda_hist %>% 
  select(id_tweet, text, created_at_tweet, id) %>% 
  inner_join(first_tweet) %>% 
  mutate(days_tweet = interval(start = cero_date, end = created_at_tweet),
         days_tweet = round(time_length(days_tweet, unit = "days"))) %>% 
  select(-cero_date) %>% 
  filter(days_tweet >= -15, days_tweet <= 15)

neda_timelapse <- neda_change %>%
  count(id, before_after = sign(days_tweet), sort = T) %>% 
  mutate(before_after = case_when(before_after == -1 ~ "before",
                                  before_after == 1 ~ "after",
                                  TRUE ~ "cero")) %>% 
  pivot_wider(values_from = n, names_from = before_after) %>% 
  filter(before >= 15 & after >= 15) %>% 
  select(-cero) %>% 
  semi_join(x = neda_change, y = .)

neda_timelapse %>% 
  inner_join(gender_output) -> neda_timelapse

library(tidytext)

replace_reg1 <- "https://t.co/[A-Za-z]\\d]+|"
replace_reg2 <- "https://t.co/[A-Za-z]\\d]+|&amp;|&lt;|&gt;|RT|https"
replace_reg <- paste0(replace_reg1, replace_reg2)
unnest_reg <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"

tidy_tweets <- neda_timelapse %>%
  mutate(text = str_to_lower(text)) %>% 
  mutate(text = str_replace_all(text, replace_reg, "")) %>% 
  unnest_tokens(word, text, token = "regex", pattern = unnest_reg)

tidy_tweets %>% 
  filter(word %in% words_cat) %>% 
  mutate(days_tweet = case_when(days_tweet < 0 ~ "before",
                                days_tweet > 0 ~ "after",
                                TRUE ~ "cero")) %>% 
  filter(days_tweet != "cero") -> tidy_words

neda_liwc <- read_rds(here::here("data", "neda_liwc.rds")) %>% 
  select(-created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15)

```

### FEMALE CATEGORY

```{r}
# female ------------------------------------------------------------------
# female -> 43

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 43)) %>% 
  pull(x_1) -> words_female

tidy_words %>% 
  filter(word %in% words_female) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_female) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
```

### FAMILY

```{r}
words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 41)) %>% 
  pull(x_1) -> words_family

tidy_words %>% 
  filter(word %in% words_family) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_family) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total))  -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
```

### ANXIETY

```{r}
# anxiety -----------------------------------------------------------------
# anxiety -> 33

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 33)) %>% 
  pull(x_1) -> words_anxiety

tidy_words %>% 
  filter(word %in% words_anxiety) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_anxiety) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total))  -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
```

## COMMON WORDS IN TWEETS BY WORDS CATEGORY:

```{r}
neda_liwc_gender %>% 
  filter(days_tweet >= 0) %>% 
  select(id_tweet, id, gender, family, female, anx) %>% 
  pivot_longer(cols = family:anx) %>% 
  filter(value > 0) %>% 
  left_join(neda_liwc %>% 
              select(id, id_tweet, text)) %>%  
  rename("category" = name) %>% 
  mutate(text = str_remove_all(text, pattern = "[:graph:]+(…)")) %>% 
  unnest_tokens(word, text, token = "tweets") %>% 
  filter(!word %in% stop_words$word,
         word != "rt",
         !word %in% str_remove_all(stop_words$word, "'")) %>% 
  mutate(word = case_when(category == "female" & word %in% c(words_female,
                                                             "womens") ~ NA_character_,
                          category == "family" & word %in% c(words_family,
                                                             "parents",
                                                             "brothers")~ NA_character_,
                          category == "anx" & word %in% words_anxiety ~ NA_character_,
                          TRUE ~ word)) %>% 
  filter(!is.na(word),
         word != "amp") %>% 
  count(category, gender, word, sort = T) %>%
  group_by(gender, category) %>% 
  top_n(10, wt = n) %>% 
  ungroup() %>% 
  pivot_wider(id_cols = c(category, word), names_from = gender, values_from = n) %>% 
  mutate(category = str_to_title(category)) %>% 
  gt::gt(groupname_col = "category",
         rowname_col = "word")   
  
```

