Causal Impact

Number of total users:

neda_liwc %>% 
  select(id) %>% 
  distinct() %>% 
  nrow()
## [1] 431
Word Category Relative Eff.(%) P Value
Female 33.031 0.001
Risk 20.29 0.001
Relig 17.409 0.015
Anx 15.936 0.046
We 15.857 0.004
Money 10.6 0.002
Hear 9.708 0.002
Body 8.89 0.003
Nonflu 7.344 0.019
Power 6.675 0.001
Focusfuture 6.63 0.006
Relativ 6.452 0.001
Tentat 4.874 0.012
Focuspresent 4.403 0.001
Negemo 4.233 0.004
Work 3.925 0.022
Social 3.632 0.001
Informal 3.155 0.002
Focuspast 2.694 0.040
Posemo -2.59 0.007
Negate -3.469 0.048
See -15.408 0.001
Ingest -26.529 0.025
Friend -27.165 0.001
Filler -42.13 0.001

CAUSAL IMPACT BY GENDER

Number of users by gender (400~):

neda_liwc_gender %>% 
  select(id, gender) %>% 
  distinct() %>% 
  count(gender)
## # A tibble: 3 x 2
##   gender     n
##   <chr>  <int>
## 1 f        173
## 2 m         65
## 3 u        193

Number of users in the baseline ():

baseline_liwc_gender %>% 
  select(id, gender) %>% 
  distinct() %>% 
  count(gender)
## # A tibble: 3 x 2
##   gender     n
##   <chr>  <int>
## 1 f       1699
## 2 m       2066
## 3 u       2978
baseline_liwc_gender %>% 
  select(-text, -created_at_tweet, -id_tweet, -name) %>% 
  group_by(gender, id, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(gender, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline

neda_liwc_gender %>% 
  select(-id_tweet, -created_at_tweet) %>% 
  group_by(gender, id, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(gender, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(fun:filler, names_to = "categ", 
               values_to = "values_neda") -> ci_neda_liwc 

pre_period <- c(1, 16)
post_period <- c(17, 31)

ci_baseline %>% 
  full_join(ci_neda_liwc) -> d_second

d_second %>% 
  # count(gender, days_tweet) %>% 
  # view()
  # view()
  # select(categ, values_neda, values_baseline) %>% 
  # nest(data = - categ) %>% 
  select(gender, categ, values_neda, values_baseline) %>%
  # filter(gender == "m") %>% view()
  group_by(categ, gender) %>% 
  nest() %>% 
  ungroup() %>% 
  select(gender, categ, data) %>% 
  mutate(mod = map(data, ~CausalImpact::CausalImpact(., 
                                                     pre_period,
                                                     post_period))) -> ci
  
ci %>% 
  mutate(summary_mod = map(mod, "summary")) %>% 
  filter(!map_lgl(summary_mod, is.null)) -> ci_resul 

ci_resul %>% 
  mutate(p = map(summary_mod, "p")) %>% 
  mutate(p = map_dbl(p, 1)) %>% 
  filter(categ %in% categories_in_gender) %>% 
  mutate(relative_effect = map(summary_mod, "RelEffect")) %>% 
  mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat

# sig_cat %>% 
#   filter(categ %in% sel_cat) -> sig_cat

sig_cat %>% 
  arrange(gender, desc(relative_effect)) %>% 
  select(gender, categ, 
         second_relative_effect = relative_effect, p_second = p) -> second_ap

second_ap %>% 
  pivot_wider(id_cols = categ, 
              values_from = c(second_relative_effect, p_second),
              names_from = gender) %>% 
  mutate(categ = str_to_title(categ)) %>% 
  mutate_at(vars(starts_with("second")), ~.*100) %>% 
  mutate_if(is.numeric, ~round(., digits = 3)) %>% 
  mutate_at(vars(starts_with("second")), function(x){
    cell_spec(x, "html", color = spec_color(x), bold = T)
  }) %>% 
  kable("html", escape = F,
        align = "lrr",
        col.names = c("Word Category", "FEMALE Relative Eff.(%)",
                      "MALE Relative Eff.(%)",
                      "UNKNOWN Relative Eff.(%)",
                      "FEMALE P Value",
                      "MALE P Value",
                      "UNKNOWN P Value")) %>%  
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)
Word Category FEMALE Relative Eff.(%) MALE Relative Eff.(%) UNKNOWN Relative Eff.(%) FEMALE P Value MALE P Value UNKNOWN P Value
Risk 27.713 7.938 10.948 0.001 0.174 0.074
Female 26.164 22.59 34.262 0.001 0.005 0.001
Relig 25.409 -2.293 15.585 0.014 0.393 0.102
We 17.588 -0.364 18.167 0.028 0.471 0.002
Hear 10.457 -3.575 10.76 0.003 0.255 0.003
Power 9.825 0.762 5.091 0.001 0.417 0.012
Anx 8.477 6.309 29.838 0.174 0.270 0.031
Money 8.209 31.643 -0.583 0.029 0.001 0.464
Tentat 7.838 2.283 3.036 0.011 0.262 0.141
Relativ 7.719 1.424 2.714 0.001 0.256 0.070
Informal 6.653 4.143 0.35 0.001 0.001 0.391
Work 6.207 0.102 2.455 0.016 0.477 0.190
Focuspast 5.91 9.87 -4.13 0.010 0.007 0.024
Social 4.541 4.411 3.713 0.001 0.002 0.003
Focuspresent 3.932 10.171 3.492 0.007 0.001 0.001
Focusfuture 2.705 9.076 13.746 0.267 0.002 0.001
Nonflu 2.614 15.021 8.292 0.326 0.042 0.079
Negemo 1.98 13.916 2.728 0.253 0.001 0.156
Body -0.127 31.309 15.016 0.494 0.001 0.001
Negate -6.244 1.931 -0.057 0.080 0.346 0.483
Posemo -7.802 -0.554 -0.266 0.001 0.434 0.423
Ingest -17.991 -27.943 -12.164 0.188 0.012 0.207
See -20.432 1.343 -17.405 0.001 0.419 0.001
Friend -34.369 -16.925 -24.181 0.001 0.092 0.008
Filler -40.55 -34.464 -52.18 0.002 0.007 0.001

Words inside categ:

The tables below contain the changes for the 4 most changed words (positive and negative) within each of the categories. The first column is the specific word. Changes_total is the difference between the “before” and “after” frequency of all users. Changes_f same as Changes_total but for female users, Changes_m for male users and Changes_u for unknown users.

library(tidyverse)

read_delim(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
           delim = "\t", skip = 1, col_names = c("number", "name"), 
           n_max = 73) -> categories_name 

read_tsv(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
         skip = 75, col_names = paste0(c("x"), c("_"), 1:11),
         guess_max = 6000, col_types = "ccccccccccc") %>% 
  mutate(x_1 = str_remove(x_1, "\\*")) -> words_dic

categories_name %>% 
  filter(name %in% c("female", "family", "anx", "shehe", "affiliation", "friend")) %>% 
  pull(number) -> number_top_categ

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. %in% number_top_categ)) %>%  
  pull(x_1) -> words_cat


# neda_timelapse ----------------------------------------------------------

library(lubridate)

# neda_hist <- read_rds(here::here("data", "NEDA_historical_twenty.rds")) %>% 
#   distinct() %>% 
#   filter(created_at_tweet >= ymd("2019-03-01")) %>% 
#   mutate(text = str_to_lower(text),
#          neda_related = str_detect(text, 
#                                    pattern = "#nedawareness|#comeasyouare|@nedastaff")) 

# gender_output <- read_tsv(here::here("data", "gender_extractor", 
#                                      "neda_liwc_gender_output.tsv"),
#                           col_names = c("id", "name", "name_proc", "gender"))

gender_output <- read_rds(here::here("data", "neda_liwc_gender_twenty.rds")) %>% 
  select(gender, id_tweet, id)
# first_tweet <- neda_hist %>% 
#   select(created_at_tweet, text, id, id_tweet, neda_related) %>% 
#   filter(neda_related) %>% 
#   arrange(created_at_tweet) %>% 
#   group_by(id) %>% 
#   slice(1) %>% 
#   ungroup() %>% 
#   select(cero_date = created_at_tweet, id) %>% 
#   filter(cero_date >= ymd("2019-01-01"))
# 
# neda_change <- neda_hist %>% 
#   select(id_tweet, text, created_at_tweet, id) %>% 
#   inner_join(first_tweet) %>% 
#   mutate(days_tweet = interval(start = cero_date, end = created_at_tweet),
#          days_tweet = round(time_length(days_tweet, unit = "days"))) %>% 
#   select(-cero_date) %>% 
#   filter(days_tweet >= -15, days_tweet <= 15)
# 
# neda_timelapse <- neda_change %>%
#   count(id, before_after = sign(days_tweet), sort = T) %>% 
#   mutate(before_after = case_when(before_after == -1 ~ "before",
#                                   before_after == 1 ~ "after",
#                                   TRUE ~ "cero")) %>% 
#   pivot_wider(values_from = n, names_from = before_after) %>% 
#   filter(before >= 15 & after >= 15) %>% 
#   select(-cero) %>% 
#   semi_join(x = neda_change, y = .)



read_rds(here::here("data", "neda_timelapse_twenty.rds")) %>% 
  inner_join(gender_output) -> neda_timelapse

library(tidytext)

replace_reg1 <- "https://t.co/[A-Za-z]\\d]+|"
replace_reg2 <- "https://t.co/[A-Za-z]\\d]+|&amp;|&lt;|&gt;|RT|https"
replace_reg <- paste0(replace_reg1, replace_reg2)
unnest_reg <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"

tidy_tweets <- neda_timelapse %>%
  mutate(text = str_to_lower(text)) %>% 
  mutate(text = str_replace_all(text, replace_reg, "")) %>% 
  unnest_tokens(word, text, token = "regex", pattern = unnest_reg)

tidy_tweets %>% 
  filter(word %in% words_cat) %>% 
  mutate(days_tweet = case_when(days_tweet < 0 ~ "before",
                                days_tweet > 0 ~ "after",
                                TRUE ~ "cero")) %>% 
  filter(days_tweet != "cero") -> tidy_words

neda_liwc <- read_rds(here::here("data", "neda_liwc_twenty.rds")) %>% 
  select(-created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15)

FEMALE

# female ------------------------------------------------------------------
# female -> 43

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 43)) %>% 
  pull(x_1) -> words_female

tidy_words %>% 
  filter(word %in% words_female) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_female) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
word changes_total changes_f changes_m changes_u
women 816 323 110 383
she 489 181 67 241
her 431 234 51 146
madam 233 39 2 192
girly -9 -4 -3 -2
witch -9 -10 -2 3
gal -10 -7 -2 -1
gals -14 -5 0 -9
lesbian -56 -24 -6 -26

RISK

# risk -> 85

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 85)) %>% 
  pull(x_1) -> words_risk

tidy_words %>% 
  filter(word %in% words_risk) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_risk) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
word changes_total changes_f changes_m changes_u
avoid 92 39 19 34
risk 89 50 28 11
threat 51 32 7 12
alarm 11 4 2 5
dread 3 0 2 1
doubt 2 2 3 -3
inhibit 1 1 0 0
unsure -3 0 -2 -1

RELIGION

# relig -> 114

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 114)) %>% 
  pull(x_1) -> words_relig

tidy_words %>% 
  filter(word %in% words_relig) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_relig) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
word changes_total changes_f changes_m changes_u
church 31 20 9 2
goddess 4 2 0 2
church 31 20 9 2
goddess 4 2 0 2

MONEY

# money -> 113

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 113)) %>% 
  pull(x_1) -> words_money

tidy_words %>% 
  filter(word %in% words_money) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_money) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
word changes_total changes_f changes_m changes_u
donate 10 6 1 3
donation 10 0 6 4
donate 10 6 1 3
donation 10 0 6 4

BODY

# body -> 71

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 114)) %>% 
  pull(x_1) -> words_body

tidy_words %>% 
  filter(word %in% words_body) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_body) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
word changes_total changes_f changes_m changes_u
church 31 20 9 2
goddess 4 2 0 2
church 31 20 9 2
goddess 4 2 0 2

COMMON WORDS IN TWEETS BY WORDS CATEGORY:

neda_liwc_gender %>% 
  filter(days_tweet >= 0) %>% 
  select(id_tweet, id, gender, female, risk, relig, body, money) %>% 
  pivot_longer(cols = female:money) %>% 
  filter(value > 0) %>% 
  left_join(neda_liwc %>% 
              select(id, id_tweet, text)) %>%  
  rename("category" = name) %>% 
  mutate(text = str_remove_all(text, pattern = "[:graph:]+(…)")) %>% 
  unnest_tokens(word, text, token = "tweets") %>%  
  filter(!word %in% stop_words$word,
         !word %in% c("de", 4, "da", "la", "en", "le", "los", 3),
         word != "rt",
         !word %in% str_remove_all(stop_words$word, "'")) %>% 
  # mutate(word = case_when(category == "female" & word %in% c(words_female,
  #                                                            "womens") ~ NA_character_,
  #                         category == "family" & word %in% c(words_family,
  #                                                            "parents",
  #                                                            "brothers")~ NA_character_,
  #                         category == "anx" & word %in% words_anxiety ~ NA_character_,
  #                         TRUE ~ word)) %>% 
  filter(!is.na(word),
         word != "amp") %>% 
  count(category, gender, word, sort = T) %>%
  group_by(gender, category) %>% 
  top_n(10, wt = n) %>% 
  ungroup() %>% 
  pivot_wider(id_cols = c(category, word), names_from = gender, values_from = n) %>% 
  mutate(category = str_to_title(category)) %>% 
  gt::gt(groupname_col = "category",
         rowname_col = "word")   
u f m
Female
women 569 448 194
girl 491 251 84
sharlene 483 187 NA
love 274 183 60
na 265 NA NA
woman 229 247 104
day 229 192 51
madam 229 NA NA
girls 211 174 61
happy 197 149 NA
mom NA 155 NA
womens NA 142 52
people NA NA 72
trans NA NA 68
time NA NA 50
Risk
stop 514 349 205
bad 403 260 167
people 247 185 128
safe 237 127 59
coronavirus 121 192 58
wrong 174 86 71
crisis NA 148 98
health NA 138 59
protect 127 87 NA
lose 122 NA NA
stay 119 NA NA
trump NA 109 NA
worst 105 NA NA
time NA NA 61
security NA NA 58
Body
shit 444 207 132
heart 302 158 94
hands 249 259 96
body 239 151 64
people 218 141 91
sleep 210 NA NA
ass 186 124 53
hand 163 147 72
love 158 NA NA
wash NA 145 52
time 144 NA NA
head NA 108 63
coronavirus NA 88 NA
eyes NA NA 52
Relig
god 334 270 369
lord NA 37 131
@youversion NA NA 130
jesus NA 36 102
holy 100 55 NA
love 94 66 72
life NA NA 79
angel 75 29 NA
soul 74 51 NA
bless 70 29 NA
people 62 50 42
shit 60 38 NA
sin 59 39 52
day 55 NA NA
@neilvermillion NA NA 48
@janayellis NA NA 43
minister NA 29 NA
Money
free 203 221 87
people 181 193 96
money 166 135 85
coronavirus 99 146 57
account 135 NA 46
pay 91 130 NA
worth 123 92 NA
time 117 122 57
sharlene 100 NA NA
paid NA 96 NA
buy 87 93 50
bill NA 91 NA
marketing NA NA 87
sales NA NA 70
business NA NA 46
---
title: "Causal Impact" 
clean: true
output:
  bookdown::html_document2:
    number_sections: false
    code_download: true
    code_folding: hide
    self_contained: true
    toc: true
    toc_float: false
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

xfun::pkg_attach("tidyverse", "lubridate", "kableExtra")

theme_set(theme_linedraw())
```

## Causal Impact 

```{r echo=FALSE}
baseline_liwc <- read_rds(here::here("data", "baseline_liwc_twenty.rds"))

neda_liwc <- read_rds(here::here("data", "neda_liwc_twenty.rds")) %>% 
  select(-text, -created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15)

sel_cat <- c(
  "i","we","you","shehe","they","ipron","negate","compare","posemo","negemo","anx","anger","sad","social","family","friend","female","male","insight","cause","discrep","tentat","certain","differ","see","hear","feel","body","health","sexual","ingest","affiliation","achiev","power","reward","risk","focuspast","focuspresent","focusfuture","relativ","work","leisure","home","money","relig","death","informal","swear","assent","nonflu","filler"
)
```

Number of total users:
```{r}
neda_liwc %>% 
  select(id) %>% 
  distinct() %>% 
  nrow()
```

```{r echo=FALSE}
baseline_liwc %>% 
  select(-text, -created_at_tweet, -id_tweet) %>% 
  group_by(days_tweet, id) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline

neda_liwc %>% 
  select(-id_tweet) %>% 
  group_by(days_tweet, id) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(-days_tweet, names_to = "categ", values_to = "values_neda") -> ci_neda_liwc 
  
pre_period <- c(1, 16)
post_period <- c(17, 31)

ci_baseline %>% 
  inner_join(ci_neda_liwc) -> d_second

d_second %>% 
  select(categ, values_neda, values_baseline) %>% 
  nest(data = - categ) %>% 
  mutate(mod = map(data, ~CausalImpact::CausalImpact(., pre_period, post_period))) -> ci
  
ci %>% 
  mutate(summary_mod = map(mod, "summary")) %>% 
  filter(!map_lgl(summary_mod, is.null)) -> ci_resul 

ci_resul %>% 
  mutate(p = map(summary_mod, "p")) %>% 
  mutate(p = map_dbl(p, 1)) %>% 
  filter(p < 0.05) %>% 
  mutate(relative_effect = map(summary_mod, "RelEffect")) %>% 
  mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat

sig_cat %>% 
  filter(categ %in% sel_cat) -> sig_cat

categories_in_gender <- sig_cat %>% pull(categ)

sig_cat %>% 
  arrange(desc(relative_effect)) %>% 
  select(categ, second_relative_effect = relative_effect, p_second = p) -> second_ap

second_ap %>% 
  mutate(categ = str_to_title(categ)) %>% 
  mutate_at(vars(second_relative_effect), ~.*100) %>% 
  mutate_if(is.numeric, ~round(., digits = 3)) %>% 
  mutate_at(vars(second_relative_effect), function(x){
    cell_spec(x, "html", color = spec_color(x), bold = T)
  }) %>% 
  kable("html", escape = F,
        align = "lrr",
        col.names = c("Word Category", "Relative Eff.(%)",
                      "P Value")) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)
```

## CAUSAL IMPACT BY GENDER

```{r echo=FALSE}
baseline_liwc_gender <- read_rds(here::here("data",
                                            "baseline_liwc_gender_twenty.rds"))

neda_liwc_gender <- read_rds(here::here("data", 
                                        "neda_liwc_gender_twenty.rds")) %>% 
  # select(-text, -created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15)

sel_cat <- c(
  "i","we","you","shehe","they","ipron","negate","compare","posemo","negemo","anx","anger","sad","social","family","friend","female","male","insight","cause","discrep","tentat","certain","differ","see","hear","feel","body","health","sexual","ingest","affiliation","achiev","power","reward","risk","focuspast","focuspresent","focusfuture","relativ","work","leisure","home","money","relig","death","informal","swear","assent","nonflu","filler"
)
```

Number of users by gender (400~):
```{r}
neda_liwc_gender %>% 
  select(id, gender) %>% 
  distinct() %>% 
  count(gender)
```

Number of users in the baseline ():
```{r}
baseline_liwc_gender %>% 
  select(id, gender) %>% 
  distinct() %>% 
  count(gender)
```

```{r }
baseline_liwc_gender %>% 
  select(-text, -created_at_tweet, -id_tweet, -name) %>% 
  group_by(gender, id, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(gender, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline

neda_liwc_gender %>% 
  select(-id_tweet, -created_at_tweet) %>% 
  group_by(gender, id, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(gender, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(fun:filler, names_to = "categ", 
               values_to = "values_neda") -> ci_neda_liwc 

pre_period <- c(1, 16)
post_period <- c(17, 31)

ci_baseline %>% 
  full_join(ci_neda_liwc) -> d_second

d_second %>% 
  # count(gender, days_tweet) %>% 
  # view()
  # view()
  # select(categ, values_neda, values_baseline) %>% 
  # nest(data = - categ) %>% 
  select(gender, categ, values_neda, values_baseline) %>%
  # filter(gender == "m") %>% view()
  group_by(categ, gender) %>% 
  nest() %>% 
  ungroup() %>% 
  select(gender, categ, data) %>% 
  mutate(mod = map(data, ~CausalImpact::CausalImpact(., 
                                                     pre_period,
                                                     post_period))) -> ci
  
ci %>% 
  mutate(summary_mod = map(mod, "summary")) %>% 
  filter(!map_lgl(summary_mod, is.null)) -> ci_resul 

ci_resul %>% 
  mutate(p = map(summary_mod, "p")) %>% 
  mutate(p = map_dbl(p, 1)) %>% 
  filter(categ %in% categories_in_gender) %>% 
  mutate(relative_effect = map(summary_mod, "RelEffect")) %>% 
  mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat

# sig_cat %>% 
#   filter(categ %in% sel_cat) -> sig_cat

sig_cat %>% 
  arrange(gender, desc(relative_effect)) %>% 
  select(gender, categ, 
         second_relative_effect = relative_effect, p_second = p) -> second_ap

second_ap %>% 
  pivot_wider(id_cols = categ, 
              values_from = c(second_relative_effect, p_second),
              names_from = gender) %>% 
  mutate(categ = str_to_title(categ)) %>% 
  mutate_at(vars(starts_with("second")), ~.*100) %>% 
  mutate_if(is.numeric, ~round(., digits = 3)) %>% 
  mutate_at(vars(starts_with("second")), function(x){
    cell_spec(x, "html", color = spec_color(x), bold = T)
  }) %>% 
  kable("html", escape = F,
        align = "lrr",
        col.names = c("Word Category", "FEMALE Relative Eff.(%)",
                      "MALE Relative Eff.(%)",
                      "UNKNOWN Relative Eff.(%)",
                      "FEMALE P Value",
                      "MALE P Value",
                      "UNKNOWN P Value")) %>%  
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)
```

## Words inside categ:

The tables below contain the changes for the 4 most changed words (positive and negative) within each of the categories. The first column is the specific word. Changes_total is the difference between the "before" and "after" frequency of all users. Changes_f same as Changes_total but for female users, Changes_m for male users and Changes_u for unknown users.

```{r }
library(tidyverse)

read_delim(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
           delim = "\t", skip = 1, col_names = c("number", "name"), 
           n_max = 73) -> categories_name 

read_tsv(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
         skip = 75, col_names = paste0(c("x"), c("_"), 1:11),
         guess_max = 6000, col_types = "ccccccccccc") %>% 
  mutate(x_1 = str_remove(x_1, "\\*")) -> words_dic

categories_name %>% 
  filter(name %in% c("female", "family", "anx", "shehe", "affiliation", "friend")) %>% 
  pull(number) -> number_top_categ

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. %in% number_top_categ)) %>%  
  pull(x_1) -> words_cat


# neda_timelapse ----------------------------------------------------------

library(lubridate)

# neda_hist <- read_rds(here::here("data", "NEDA_historical_twenty.rds")) %>% 
#   distinct() %>% 
#   filter(created_at_tweet >= ymd("2019-03-01")) %>% 
#   mutate(text = str_to_lower(text),
#          neda_related = str_detect(text, 
#                                    pattern = "#nedawareness|#comeasyouare|@nedastaff")) 

# gender_output <- read_tsv(here::here("data", "gender_extractor", 
#                                      "neda_liwc_gender_output.tsv"),
#                           col_names = c("id", "name", "name_proc", "gender"))

gender_output <- read_rds(here::here("data", "neda_liwc_gender_twenty.rds")) %>% 
  select(gender, id_tweet, id)
# first_tweet <- neda_hist %>% 
#   select(created_at_tweet, text, id, id_tweet, neda_related) %>% 
#   filter(neda_related) %>% 
#   arrange(created_at_tweet) %>% 
#   group_by(id) %>% 
#   slice(1) %>% 
#   ungroup() %>% 
#   select(cero_date = created_at_tweet, id) %>% 
#   filter(cero_date >= ymd("2019-01-01"))
# 
# neda_change <- neda_hist %>% 
#   select(id_tweet, text, created_at_tweet, id) %>% 
#   inner_join(first_tweet) %>% 
#   mutate(days_tweet = interval(start = cero_date, end = created_at_tweet),
#          days_tweet = round(time_length(days_tweet, unit = "days"))) %>% 
#   select(-cero_date) %>% 
#   filter(days_tweet >= -15, days_tweet <= 15)
# 
# neda_timelapse <- neda_change %>%
#   count(id, before_after = sign(days_tweet), sort = T) %>% 
#   mutate(before_after = case_when(before_after == -1 ~ "before",
#                                   before_after == 1 ~ "after",
#                                   TRUE ~ "cero")) %>% 
#   pivot_wider(values_from = n, names_from = before_after) %>% 
#   filter(before >= 15 & after >= 15) %>% 
#   select(-cero) %>% 
#   semi_join(x = neda_change, y = .)



read_rds(here::here("data", "neda_timelapse_twenty.rds")) %>% 
  inner_join(gender_output) -> neda_timelapse

library(tidytext)

replace_reg1 <- "https://t.co/[A-Za-z]\\d]+|"
replace_reg2 <- "https://t.co/[A-Za-z]\\d]+|&amp;|&lt;|&gt;|RT|https"
replace_reg <- paste0(replace_reg1, replace_reg2)
unnest_reg <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"

tidy_tweets <- neda_timelapse %>%
  mutate(text = str_to_lower(text)) %>% 
  mutate(text = str_replace_all(text, replace_reg, "")) %>% 
  unnest_tokens(word, text, token = "regex", pattern = unnest_reg)

tidy_tweets %>% 
  filter(word %in% words_cat) %>% 
  mutate(days_tweet = case_when(days_tweet < 0 ~ "before",
                                days_tweet > 0 ~ "after",
                                TRUE ~ "cero")) %>% 
  filter(days_tweet != "cero") -> tidy_words

neda_liwc <- read_rds(here::here("data", "neda_liwc_twenty.rds")) %>% 
  select(-created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15)

```

### FEMALE 

```{r }
# female ------------------------------------------------------------------
# female -> 43

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 43)) %>% 
  pull(x_1) -> words_female

tidy_words %>% 
  filter(word %in% words_female) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_female) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
```

### RISK

```{r}
# risk -> 85

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 85)) %>% 
  pull(x_1) -> words_risk

tidy_words %>% 
  filter(word %in% words_risk) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_risk) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
```

### RELIGION
```{r}
# relig -> 114

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 114)) %>% 
  pull(x_1) -> words_relig

tidy_words %>% 
  filter(word %in% words_relig) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_relig) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()

```

## MONEY

```{r}
# money -> 113

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 113)) %>% 
  pull(x_1) -> words_money

tidy_words %>% 
  filter(word %in% words_money) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_money) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()

```

## BODY
```{r}
# body -> 71

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 114)) %>% 
  pull(x_1) -> words_body

tidy_words %>% 
  filter(word %in% words_body) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_body) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()

```


## COMMON WORDS IN TWEETS BY WORDS CATEGORY:

```{r }
neda_liwc_gender %>% 
  filter(days_tweet >= 0) %>% 
  select(id_tweet, id, gender, female, risk, relig, body, money) %>% 
  pivot_longer(cols = female:money) %>% 
  filter(value > 0) %>% 
  left_join(neda_liwc %>% 
              select(id, id_tweet, text)) %>%  
  rename("category" = name) %>% 
  mutate(text = str_remove_all(text, pattern = "[:graph:]+(…)")) %>% 
  unnest_tokens(word, text, token = "tweets") %>%  
  filter(!word %in% stop_words$word,
         !word %in% c("de", 4, "da", "la", "en", "le", "los", 3),
         word != "rt",
         !word %in% str_remove_all(stop_words$word, "'")) %>% 
  # mutate(word = case_when(category == "female" & word %in% c(words_female,
  #                                                            "womens") ~ NA_character_,
  #                         category == "family" & word %in% c(words_family,
  #                                                            "parents",
  #                                                            "brothers")~ NA_character_,
  #                         category == "anx" & word %in% words_anxiety ~ NA_character_,
  #                         TRUE ~ word)) %>% 
  filter(!is.na(word),
         word != "amp") %>% 
  count(category, gender, word, sort = T) %>%
  group_by(gender, category) %>% 
  top_n(10, wt = n) %>% 
  ungroup() %>% 
  pivot_wider(id_cols = c(category, word), names_from = gender, values_from = n) %>% 
  mutate(category = str_to_title(category)) %>% 
  gt::gt(groupname_col = "category",
         rowname_col = "word")   
  
```

