Causal Impact

Number of total users:

neda_liwc %>% 
  select(id) %>% 
  distinct() %>% 
  nrow()
## [1] 1746
Word Category Relative Eff.(%) P Value
Female 17.418 0.001
Anx 7.566 0.002
Family 6.452 0.005
Money 5.991 0.002
Relig 5.209 0.045
Achiev 3.813 0.011
They 3.397 0.034
Negate 2.889 0.003
Health 2.526 0.004
Power 2.458 0.010
Negemo 2.066 0.037
Informal 1.116 0.038
Ipron -1.476 0.015
See -2.036 0.025
You -2.238 0.034
Differ -2.694 0.006
Posemo -3.277 0.001
Tentat -3.347 0.001
Shehe -7.042 0.020
Affiliation -7.167 0.003
Friend -16.459 0.041

CAUSAL IMPACT BY GENDER

Number of users by gender (1746 total):

neda_liwc_gender %>% 
  select(id, gender) %>% 
  distinct() %>% 
  count(gender)
## # A tibble: 3 x 2
##   gender     n
##   <chr>  <int>
## 1 f        762
## 2 m        313
## 3 u        671

Number of users in the baseline (2991 total):

baseline_liwc_gender %>% 
  select(id, gender) %>% 
  distinct() %>% 
  count(gender)
## # A tibble: 3 x 2
##   gender     n
##   <chr>  <int>
## 1 f        855
## 2 m        748
## 3 u       1388
baseline_liwc_gender %>% 
  select(-text, -created_at, -id_tweet, -name) %>% 
  group_by(gender, id, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(gender, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline

neda_liwc_gender %>% 
  select(-id_tweet, -name, -name_proc) %>% 
  group_by(gender, id, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(gender, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(fun:filler, names_to = "categ", 
               values_to = "values_neda") -> ci_neda_liwc 

pre_period <- c(1, 16)
post_period <- c(17, 31)

ci_baseline %>% 
  full_join(ci_neda_liwc) -> d_second

d_second %>% 
  # count(gender, days_tweet) %>% 
  # view()
  # view()
  # select(categ, values_neda, values_baseline) %>% 
  # nest(data = - categ) %>% 
  select(gender, categ, values_neda, values_baseline) %>%
  # filter(gender == "m") %>% view()
  group_by(categ, gender) %>% 
  nest() %>% 
  ungroup() %>% 
  select(gender, categ, data) %>% 
  mutate(mod = map(data, ~CausalImpact::CausalImpact(., 
                                                     pre_period,
                                                     post_period))) -> ci
  
ci %>% 
  mutate(summary_mod = map(mod, "summary")) %>% 
  filter(!map_lgl(summary_mod, is.null)) -> ci_resul 

ci_resul %>% 
  mutate(p = map(summary_mod, "p")) %>% 
  mutate(p = map_dbl(p, 1)) %>% 
  filter(categ %in% categories_in_gender) %>% 
  mutate(relative_effect = map(summary_mod, "RelEffect")) %>% 
  mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat

# sig_cat %>% 
#   filter(categ %in% sel_cat) -> sig_cat

sig_cat %>% 
  arrange(gender, desc(relative_effect)) %>% 
  select(gender, categ, 
         second_relative_effect = relative_effect, p_second = p) -> second_ap

second_ap %>% 
  pivot_wider(id_cols = categ, 
              values_from = c(second_relative_effect, p_second),
              names_from = gender) %>% 
  mutate(categ = str_to_title(categ)) %>% 
  mutate_at(vars(starts_with("second")), ~.*100) %>% 
  mutate_if(is.numeric, ~round(., digits = 3)) %>% 
  mutate_at(vars(starts_with("second")), function(x){
    cell_spec(x, "html", color = spec_color(x), bold = T)
  }) %>% 
  kable("html", escape = F,
        align = "lrr",
        col.names = c("Word Category", "FEMALE Relative Eff.(%)",
                      "MALE Relative Eff.(%)",
                      "UNKNOWN Relative Eff.(%)",
                      "FEMALE P Value",
                      "MALE P Value",
                      "UNKNOWN P Value")) %>%  
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)
Word Category FEMALE Relative Eff.(%) MALE Relative Eff.(%) UNKNOWN Relative Eff.(%) FEMALE P Value MALE P Value UNKNOWN P Value
Female 13.112 24.294 23.987 0.001 0.001 0.001
Anx 13.005 -2.704 10.903 0.001 0.283 0.001
Family 6.994 4.204 15.167 0.003 0.092 0.001
They 5.827 1.555 0.672 0.010 0.330 0.415
Achiev 5.153 -2.931 4.033 0.018 0.078 0.030
Money 4.866 14.027 4.088 0.002 0.002 0.104
Negate 4.449 -6.645 6.511 0.001 0.025 0.008
Health 4.246 9.044 -2.452 0.010 0.001 0.132
Negemo 3.94 -3.356 0.572 0.001 0.055 0.388
See 2.013 0.151 -7.928 0.146 0.485 0.001
Power 1.014 1.295 4.504 0.253 0.213 0.001
Relig 0.745 8.26 12.547 0.426 0.079 0.012
Informal -0.048 3.212 0.702 0.473 0.006 0.192
Ipron -0.233 -3.002 -1.231 0.401 0.026 0.111
Differ -0.539 -5.58 -0.83 0.340 0.006 0.224
Tentat -0.93 -3.098 -5.979 0.233 0.050 0.001
You -3.067 1.011 -2.898 0.014 0.249 0.046
Posemo -3.119 -7.538 -1.434 0.001 0.001 0.099
Affiliation -5.861 -8.878 -6.936 0.003 0.001 0.011
Shehe -7.875 -1.764 -4.549 0.038 0.384 0.096
Friend -11.06 -19.113 -19.873 0.135 0.002 0.054

Words inside categ:

The tables below contain the changes for the 4 most changed words (positive and negative) within each of the categories. The first column is the specific word. Changes_total is the difference between the “before” and “after” frequency of all users. Changes_f same as Changes_total but for female users, Changes_m for male users and Changes_u for unknown users.

library(tidyverse)

read_delim(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
           delim = "\t", skip = 1, col_names = c("number", "name"), 
           n_max = 73) -> categories_name 

read_tsv(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
         skip = 75, col_names = paste0(c("x"), c("_"), 1:11),
         guess_max = 6000, col_types = "ccccccccccc") %>% 
  mutate(x_1 = str_remove(x_1, "\\*")) -> words_dic

categories_name %>% 
  filter(name %in% c("female", "family", "anx", "shehe", "affiliation", "friend")) %>% 
  pull(number) -> number_top_categ

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. %in% number_top_categ)) %>%  
  pull(x_1) -> words_cat


# neda_timelapse ----------------------------------------------------------

library(lubridate)

neda_hist <- read_rds(here::here("data", "NEDA_historical.rds")) %>% 
  distinct() %>% 
  filter(created_at_tweet >= ymd("2018-03-01")) %>% 
  mutate(text = str_to_lower(text),
         neda_related = str_detect(text, 
                                   pattern = "#nedawareness|#comeasyouare|@nedastaff")) 

gender_output <- read_tsv(here::here("data", "gender_extractor", 
                                     "neda_liwc_gender_output.tsv"),
                          col_names = c("id", "name", "name_proc", "gender"))

first_tweet <- neda_hist %>% 
  select(created_at_tweet, text, id, id_tweet, neda_related) %>% 
  filter(neda_related) %>% 
  arrange(created_at_tweet) %>% 
  group_by(id) %>% 
  slice(1) %>% 
  ungroup() %>% 
  select(cero_date = created_at_tweet, id) %>% 
  filter(cero_date >= ymd("2019-01-01"))

neda_change <- neda_hist %>% 
  select(id_tweet, text, created_at_tweet, id) %>% 
  inner_join(first_tweet) %>% 
  mutate(days_tweet = interval(start = cero_date, end = created_at_tweet),
         days_tweet = round(time_length(days_tweet, unit = "days"))) %>% 
  select(-cero_date) %>% 
  filter(days_tweet >= -15, days_tweet <= 15)

neda_timelapse <- neda_change %>%
  count(id, before_after = sign(days_tweet), sort = T) %>% 
  mutate(before_after = case_when(before_after == -1 ~ "before",
                                  before_after == 1 ~ "after",
                                  TRUE ~ "cero")) %>% 
  pivot_wider(values_from = n, names_from = before_after) %>% 
  filter(before >= 15 & after >= 15) %>% 
  select(-cero) %>% 
  semi_join(x = neda_change, y = .)

neda_timelapse %>% 
  inner_join(gender_output) -> neda_timelapse

library(tidytext)

replace_reg1 <- "https://t.co/[A-Za-z]\\d]+|"
replace_reg2 <- "https://t.co/[A-Za-z]\\d]+|&amp;|&lt;|&gt;|RT|https"
replace_reg <- paste0(replace_reg1, replace_reg2)
unnest_reg <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"

tidy_tweets <- neda_timelapse %>%
  mutate(text = str_to_lower(text)) %>% 
  mutate(text = str_replace_all(text, replace_reg, "")) %>% 
  unnest_tokens(word, text, token = "regex", pattern = unnest_reg)

tidy_tweets %>% 
  filter(word %in% words_cat) %>% 
  mutate(days_tweet = case_when(days_tweet < 0 ~ "before",
                                days_tweet > 0 ~ "after",
                                TRUE ~ "cero")) %>% 
  filter(days_tweet != "cero") -> tidy_words

neda_liwc <- read_rds(here::here("data", "neda_liwc.rds")) %>% 
  select(-created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15)

FEMALE CATEGORY

# female ------------------------------------------------------------------
# female -> 43

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 43)) %>% 
  pull(x_1) -> words_female

tidy_words %>% 
  filter(word %in% words_female) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_female) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
word changes_total changes_f changes_m changes_u
women 2117 903 403 811
her 320 251 -18 87
she 228 296 -30 -38
woman 207 90 37 80
gf -30 -20 -8 -2
wife -78 -37 -43 2
queen -96 1 -32 -65
lady -158 -115 -22 -21

FAMILY

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 41)) %>% 
  pull(x_1) -> words_family

tidy_words %>% 
  filter(word %in% words_family) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_family) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total))  -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
word changes_total changes_f changes_m changes_u
ma 132 52 49 31
family 122 97 11 14
daughter 115 36 26 53
families 88 47 21 20
mama -19 -24 -2 7
pa -19 -10 7 -16
daddy -23 -10 -6 -7
bro -63 7 30 -100
wife -78 -37 -43 2

ANXIETY

# anxiety -----------------------------------------------------------------
# anxiety -> 33

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 33)) %>% 
  pull(x_1) -> words_anxiety

tidy_words %>% 
  filter(word %in% words_anxiety) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_anxiety) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total))  -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
word changes_total changes_f changes_m changes_u
risk 190 16 117 57
stress 78 40 12 26
upset 73 32 13 28
worried 70 33 14 23
confuse -10 -6 1 -5
horrible -11 -5 3 -9
doubt -21 -11 -1 -9
scared -31 16 -13 -34

COMMON WORDS IN TWEETS BY WORDS CATEGORY:

neda_liwc_gender %>% 
  filter(days_tweet >= 0) %>% 
  select(id_tweet, id, gender, family, female, anx) %>% 
  pivot_longer(cols = family:anx) %>% 
  filter(value > 0) %>% 
  left_join(neda_liwc %>% 
              select(id, id_tweet, text)) %>%  
  rename("category" = name) %>% 
  mutate(text = str_remove_all(text, pattern = "[:graph:]+(…)")) %>% 
  unnest_tokens(word, text, token = "tweets") %>%  
  filter(!word %in% stop_words$word,
         !word %in% c("de", 4, "da", "la", "en", "le", "los", 3),
         word != "rt",
         !word %in% str_remove_all(stop_words$word, "'")) %>% 
  mutate(word = case_when(category == "female" & word %in% c(words_female,
                                                             "womens") ~ NA_character_,
                          category == "family" & word %in% c(words_family,
                                                             "parents",
                                                             "brothers")~ NA_character_,
                          category == "anx" & word %in% words_anxiety ~ NA_character_,
                          TRUE ~ word)) %>% 
  filter(!is.na(word),
         word != "amp") %>% 
  count(category, gender, word, sort = T) %>%
  group_by(gender, category) %>% 
  top_n(10, wt = n) %>% 
  ungroup() %>% 
  pivot_wider(id_cols = c(category, word), names_from = gender, values_from = n) %>% 
  mutate(category = str_to_title(category)) %>% 
  gt::gt(groupname_col = "category",
         rowname_col = "word")   
f u m
Female
day 521 453 232
love 410 276 135
happy 333 340 145
time 311 219 123
international 281 245 130
people 271 210 91
black 239 202 99
notre NA 227 NA
life 194 146 97
white 184 NA NA
#internationalwomensday 165 180 NA
amazing NA NA 107
world NA NA 105
Anx
love 138 362 152
people 351 287 105
struggling 243 218 NA
feel 168 124 NA
time 159 NA NA
struggle 158 NA NA
@swampmusicinfo NA 157 70
mental 152 123 NA
@laurarjacobs NA 148 NA
laura NA 145 65
jacobs NA 135 NA
life 124 NA NA
depression 121 NA NA
eating 119 NA NA
ft NA 117 NA
sixty NA NA 92
$safe NA NA 90
cse NA NA 90
solutions NA NA 90
news NA NA 86
@financialbuzz NA NA 84
Family
love 258 160 78
day 219 148 90
people 203 167 54
kids 202 153 NA
time 197 126 74
apologize NA 170 NA
ur NA 170 NA
children 157 159 NA
friends 142 NA 63
women 136 NA NA
child 128 NA NA
follow NA 126 NA
pregnant 123 NA NA
half NA 120 NA
life NA 120 86
fathersrightshq NA NA 52
happy NA NA 49
married NA NA 45
school NA NA 45
---
title: "Causal Impact" 
clean: true
output:
  bookdown::html_document2:
    number_sections: false
    code_download: true
    code_folding: hide
    self_contained: true
    toc: true
    toc_float: false
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

xfun::pkg_attach("tidyverse", "lubridate", "kableExtra")

theme_set(theme_linedraw())
```

## Causal Impact 

```{r echo=FALSE}
baseline_liwc <- read_rds(here::here("data", "baseline_liwc.rds"))

neda_liwc <- read_rds(here::here("data", "neda_liwc.rds")) %>% 
  select(-text, -created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15)
  

sel_cat <- c(
  "i","we","you","shehe","they","ipron","negate","compare","posemo","negemo","anx","anger","sad","social","family","friend","female","male","insight","cause","discrep","tentat","certain","differ","see","hear","feel","body","health","sexual","ingest","affiliation","achiev","power","reward","risk","focuspast","focuspresent","focusfuture","relativ","work","leisure","home","money","relig","death","informal","swear","assent","nonflu","filler"
)
```

Number of total users:
```{r}
neda_liwc %>% 
  select(id) %>% 
  distinct() %>% 
  nrow()
```

```{r echo=FALSE}
baseline_liwc %>% 
  select(-text, -created_at, -id_tweet) %>% 
  group_by(days_tweet, id) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline

neda_liwc %>% 
  select(-id_tweet) %>% 
  group_by(days_tweet, id) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(-days_tweet, names_to = "categ", values_to = "values_neda") -> ci_neda_liwc 
  
pre_period <- c(1, 16)
post_period <- c(17, 31)

ci_baseline %>% 
  inner_join(ci_neda_liwc) -> d_second

d_second %>% 
  select(categ, values_neda, values_baseline) %>% 
  nest(data = - categ) %>% 
  mutate(mod = map(data, ~CausalImpact::CausalImpact(., pre_period, post_period))) -> ci
  
ci %>% 
  mutate(summary_mod = map(mod, "summary")) %>% 
  filter(!map_lgl(summary_mod, is.null)) -> ci_resul 

ci_resul %>% 
  mutate(p = map(summary_mod, "p")) %>% 
  mutate(p = map_dbl(p, 1)) %>% 
  filter(p < 0.05) %>% 
  mutate(relative_effect = map(summary_mod, "RelEffect")) %>% 
  mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat

sig_cat %>% 
  filter(categ %in% sel_cat) -> sig_cat

categories_in_gender <- sig_cat %>% pull(categ)

sig_cat %>% 
  arrange(desc(relative_effect)) %>% 
  select(categ, second_relative_effect = relative_effect, p_second = p) -> second_ap

second_ap %>% 
  mutate(categ = str_to_title(categ)) %>% 
  mutate_at(vars(second_relative_effect), ~.*100) %>% 
  mutate_if(is.numeric, ~round(., digits = 3)) %>% 
  mutate_at(vars(second_relative_effect), function(x){
    cell_spec(x, "html", color = spec_color(x), bold = T)
  }) %>% 
  kable("html", escape = F,
        align = "lrr",
        col.names = c("Word Category", "Relative Eff.(%)",
                      "P Value")) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)
```

## CAUSAL IMPACT BY GENDER

```{r echo=FALSE}
baseline_liwc_gender <- read_rds(here::here("data",
                                            "baseline_liwc_gender.rds"))

neda_liwc_gender <- read_rds(here::here("data", "neda_liwc_gender.rds")) %>% 
  # select(-text, -created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15)

sel_cat <- c(
  "i","we","you","shehe","they","ipron","negate","compare","posemo","negemo","anx","anger","sad","social","family","friend","female","male","insight","cause","discrep","tentat","certain","differ","see","hear","feel","body","health","sexual","ingest","affiliation","achiev","power","reward","risk","focuspast","focuspresent","focusfuture","relativ","work","leisure","home","money","relig","death","informal","swear","assent","nonflu","filler"
)
```

Number of users by gender (1746 total):
```{r}
neda_liwc_gender %>% 
  select(id, gender) %>% 
  distinct() %>% 
  count(gender)
```

Number of users in the baseline (2991 total):
```{r}
baseline_liwc_gender %>% 
  select(id, gender) %>% 
  distinct() %>% 
  count(gender)
```

```{r }
baseline_liwc_gender %>% 
  select(-text, -created_at, -id_tweet, -name) %>% 
  group_by(gender, id, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(gender, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline

neda_liwc_gender %>% 
  select(-id_tweet, -name, -name_proc) %>% 
  group_by(gender, id, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(gender, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(fun:filler, names_to = "categ", 
               values_to = "values_neda") -> ci_neda_liwc 

pre_period <- c(1, 16)
post_period <- c(17, 31)

ci_baseline %>% 
  full_join(ci_neda_liwc) -> d_second

d_second %>% 
  # count(gender, days_tweet) %>% 
  # view()
  # view()
  # select(categ, values_neda, values_baseline) %>% 
  # nest(data = - categ) %>% 
  select(gender, categ, values_neda, values_baseline) %>%
  # filter(gender == "m") %>% view()
  group_by(categ, gender) %>% 
  nest() %>% 
  ungroup() %>% 
  select(gender, categ, data) %>% 
  mutate(mod = map(data, ~CausalImpact::CausalImpact(., 
                                                     pre_period,
                                                     post_period))) -> ci
  
ci %>% 
  mutate(summary_mod = map(mod, "summary")) %>% 
  filter(!map_lgl(summary_mod, is.null)) -> ci_resul 

ci_resul %>% 
  mutate(p = map(summary_mod, "p")) %>% 
  mutate(p = map_dbl(p, 1)) %>% 
  filter(categ %in% categories_in_gender) %>% 
  mutate(relative_effect = map(summary_mod, "RelEffect")) %>% 
  mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat

# sig_cat %>% 
#   filter(categ %in% sel_cat) -> sig_cat

sig_cat %>% 
  arrange(gender, desc(relative_effect)) %>% 
  select(gender, categ, 
         second_relative_effect = relative_effect, p_second = p) -> second_ap

second_ap %>% 
  pivot_wider(id_cols = categ, 
              values_from = c(second_relative_effect, p_second),
              names_from = gender) %>% 
  mutate(categ = str_to_title(categ)) %>% 
  mutate_at(vars(starts_with("second")), ~.*100) %>% 
  mutate_if(is.numeric, ~round(., digits = 3)) %>% 
  mutate_at(vars(starts_with("second")), function(x){
    cell_spec(x, "html", color = spec_color(x), bold = T)
  }) %>% 
  kable("html", escape = F,
        align = "lrr",
        col.names = c("Word Category", "FEMALE Relative Eff.(%)",
                      "MALE Relative Eff.(%)",
                      "UNKNOWN Relative Eff.(%)",
                      "FEMALE P Value",
                      "MALE P Value",
                      "UNKNOWN P Value")) %>%  
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)
```

## Words inside categ:

The tables below contain the changes for the 4 most changed words (positive and negative) within each of the categories. The first column is the specific word. Changes_total is the difference between the "before" and "after" frequency of all users. Changes_f same as Changes_total but for female users, Changes_m for male users and Changes_u for unknown users.

```{r}
library(tidyverse)

read_delim(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
           delim = "\t", skip = 1, col_names = c("number", "name"), 
           n_max = 73) -> categories_name 

read_tsv(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
         skip = 75, col_names = paste0(c("x"), c("_"), 1:11),
         guess_max = 6000, col_types = "ccccccccccc") %>% 
  mutate(x_1 = str_remove(x_1, "\\*")) -> words_dic

categories_name %>% 
  filter(name %in% c("female", "family", "anx", "shehe", "affiliation", "friend")) %>% 
  pull(number) -> number_top_categ

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. %in% number_top_categ)) %>%  
  pull(x_1) -> words_cat


# neda_timelapse ----------------------------------------------------------

library(lubridate)

neda_hist <- read_rds(here::here("data", "NEDA_historical.rds")) %>% 
  distinct() %>% 
  filter(created_at_tweet >= ymd("2018-03-01")) %>% 
  mutate(text = str_to_lower(text),
         neda_related = str_detect(text, 
                                   pattern = "#nedawareness|#comeasyouare|@nedastaff")) 

gender_output <- read_tsv(here::here("data", "gender_extractor", 
                                     "neda_liwc_gender_output.tsv"),
                          col_names = c("id", "name", "name_proc", "gender"))

first_tweet <- neda_hist %>% 
  select(created_at_tweet, text, id, id_tweet, neda_related) %>% 
  filter(neda_related) %>% 
  arrange(created_at_tweet) %>% 
  group_by(id) %>% 
  slice(1) %>% 
  ungroup() %>% 
  select(cero_date = created_at_tweet, id) %>% 
  filter(cero_date >= ymd("2019-01-01"))

neda_change <- neda_hist %>% 
  select(id_tweet, text, created_at_tweet, id) %>% 
  inner_join(first_tweet) %>% 
  mutate(days_tweet = interval(start = cero_date, end = created_at_tweet),
         days_tweet = round(time_length(days_tweet, unit = "days"))) %>% 
  select(-cero_date) %>% 
  filter(days_tweet >= -15, days_tweet <= 15)

neda_timelapse <- neda_change %>%
  count(id, before_after = sign(days_tweet), sort = T) %>% 
  mutate(before_after = case_when(before_after == -1 ~ "before",
                                  before_after == 1 ~ "after",
                                  TRUE ~ "cero")) %>% 
  pivot_wider(values_from = n, names_from = before_after) %>% 
  filter(before >= 15 & after >= 15) %>% 
  select(-cero) %>% 
  semi_join(x = neda_change, y = .)

neda_timelapse %>% 
  inner_join(gender_output) -> neda_timelapse

library(tidytext)

replace_reg1 <- "https://t.co/[A-Za-z]\\d]+|"
replace_reg2 <- "https://t.co/[A-Za-z]\\d]+|&amp;|&lt;|&gt;|RT|https"
replace_reg <- paste0(replace_reg1, replace_reg2)
unnest_reg <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"

tidy_tweets <- neda_timelapse %>%
  mutate(text = str_to_lower(text)) %>% 
  mutate(text = str_replace_all(text, replace_reg, "")) %>% 
  unnest_tokens(word, text, token = "regex", pattern = unnest_reg)

tidy_tweets %>% 
  filter(word %in% words_cat) %>% 
  mutate(days_tweet = case_when(days_tweet < 0 ~ "before",
                                days_tweet > 0 ~ "after",
                                TRUE ~ "cero")) %>% 
  filter(days_tweet != "cero") -> tidy_words

neda_liwc <- read_rds(here::here("data", "neda_liwc.rds")) %>% 
  select(-created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15)

```

### FEMALE CATEGORY

```{r}
# female ------------------------------------------------------------------
# female -> 43

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 43)) %>% 
  pull(x_1) -> words_female

tidy_words %>% 
  filter(word %in% words_female) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_female) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
```

### FAMILY

```{r}
words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 41)) %>% 
  pull(x_1) -> words_family

tidy_words %>% 
  filter(word %in% words_family) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_family) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total))  -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
```

### ANXIETY

```{r}
# anxiety -----------------------------------------------------------------
# anxiety -> 33

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 33)) %>% 
  pull(x_1) -> words_anxiety

tidy_words %>% 
  filter(word %in% words_anxiety) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_anxiety) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total))  -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
```

## COMMON WORDS IN TWEETS BY WORDS CATEGORY:

```{r}
neda_liwc_gender %>% 
  filter(days_tweet >= 0) %>% 
  select(id_tweet, id, gender, family, female, anx) %>% 
  pivot_longer(cols = family:anx) %>% 
  filter(value > 0) %>% 
  left_join(neda_liwc %>% 
              select(id, id_tweet, text)) %>%  
  rename("category" = name) %>% 
  mutate(text = str_remove_all(text, pattern = "[:graph:]+(…)")) %>% 
  unnest_tokens(word, text, token = "tweets") %>%  
  filter(!word %in% stop_words$word,
         !word %in% c("de", 4, "da", "la", "en", "le", "los", 3),
         word != "rt",
         !word %in% str_remove_all(stop_words$word, "'")) %>% 
  mutate(word = case_when(category == "female" & word %in% c(words_female,
                                                             "womens") ~ NA_character_,
                          category == "family" & word %in% c(words_family,
                                                             "parents",
                                                             "brothers")~ NA_character_,
                          category == "anx" & word %in% words_anxiety ~ NA_character_,
                          TRUE ~ word)) %>% 
  filter(!is.na(word),
         word != "amp") %>% 
  count(category, gender, word, sort = T) %>%
  group_by(gender, category) %>% 
  top_n(10, wt = n) %>% 
  ungroup() %>% 
  pivot_wider(id_cols = c(category, word), names_from = gender, values_from = n) %>% 
  mutate(category = str_to_title(category)) %>% 
  gt::gt(groupname_col = "category",
         rowname_col = "word")   
  
```

