Causal Impact
Number of total users:
neda_liwc %>%
select(id) %>%
distinct() %>%
nrow()
## [1] 431
|
Word Category
|
Relative Eff.(%)
|
P Value
|
|
Female
|
33.031
|
0.001
|
|
Risk
|
20.29
|
0.001
|
|
Relig
|
17.409
|
0.015
|
|
Anx
|
15.936
|
0.046
|
|
We
|
15.857
|
0.004
|
|
Money
|
10.6
|
0.002
|
|
Hear
|
9.708
|
0.002
|
|
Body
|
8.89
|
0.003
|
|
Nonflu
|
7.344
|
0.019
|
|
Power
|
6.675
|
0.001
|
|
Focusfuture
|
6.63
|
0.006
|
|
Relativ
|
6.452
|
0.001
|
|
Tentat
|
4.874
|
0.012
|
|
Focuspresent
|
4.403
|
0.001
|
|
Negemo
|
4.233
|
0.004
|
|
Work
|
3.925
|
0.022
|
|
Social
|
3.632
|
0.001
|
|
Informal
|
3.155
|
0.002
|
|
Focuspast
|
2.694
|
0.040
|
|
Posemo
|
-2.59
|
0.007
|
|
Negate
|
-3.469
|
0.048
|
|
See
|
-15.408
|
0.001
|
|
Ingest
|
-26.529
|
0.025
|
|
Friend
|
-27.165
|
0.001
|
|
Filler
|
-42.13
|
0.001
|
CAUSAL IMPACT BY GENDER
Number of users by gender (400~):
neda_liwc_gender %>%
select(id, gender) %>%
distinct() %>%
count(gender)
## # A tibble: 3 x 2
## gender n
## <chr> <int>
## 1 f 173
## 2 m 65
## 3 u 193
Number of users in the baseline ():
baseline_liwc_gender %>%
select(id, gender) %>%
distinct() %>%
count(gender)
## # A tibble: 3 x 2
## gender n
## <chr> <int>
## 1 f 1699
## 2 m 2066
## 3 u 2978
baseline_liwc_gender %>%
select(-text, -created_at_tweet, -id_tweet, -name) %>%
group_by(gender, id, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
select(-id) %>%
group_by(gender, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline
neda_liwc_gender %>%
select(-id_tweet, -created_at_tweet) %>%
group_by(gender, id, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
select(-id) %>%
group_by(gender, days_tweet) %>%
summarise_all(mean) %>%
ungroup() %>%
pivot_longer(fun:filler, names_to = "categ",
values_to = "values_neda") -> ci_neda_liwc
pre_period <- c(1, 16)
post_period <- c(17, 31)
ci_baseline %>%
full_join(ci_neda_liwc) -> d_second
d_second %>%
# count(gender, days_tweet) %>%
# view()
# view()
# select(categ, values_neda, values_baseline) %>%
# nest(data = - categ) %>%
select(gender, categ, values_neda, values_baseline) %>%
# filter(gender == "m") %>% view()
group_by(categ, gender) %>%
nest() %>%
ungroup() %>%
select(gender, categ, data) %>%
mutate(mod = map(data, ~CausalImpact::CausalImpact(.,
pre_period,
post_period))) -> ci
ci %>%
mutate(summary_mod = map(mod, "summary")) %>%
filter(!map_lgl(summary_mod, is.null)) -> ci_resul
ci_resul %>%
mutate(p = map(summary_mod, "p")) %>%
mutate(p = map_dbl(p, 1)) %>%
filter(categ %in% categories_in_gender) %>%
mutate(relative_effect = map(summary_mod, "RelEffect")) %>%
mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat
# sig_cat %>%
# filter(categ %in% sel_cat) -> sig_cat
sig_cat %>%
arrange(gender, desc(relative_effect)) %>%
select(gender, categ,
second_relative_effect = relative_effect, p_second = p) -> second_ap
second_ap %>%
pivot_wider(id_cols = categ,
values_from = c(second_relative_effect, p_second),
names_from = gender) %>%
mutate(categ = str_to_title(categ)) %>%
mutate_at(vars(starts_with("second")), ~.*100) %>%
mutate_if(is.numeric, ~round(., digits = 3)) %>%
mutate_at(vars(starts_with("second")), function(x){
cell_spec(x, "html", color = spec_color(x), bold = T)
}) %>%
kable("html", escape = F,
align = "lrr",
col.names = c("Word Category", "FEMALE Relative Eff.(%)",
"MALE Relative Eff.(%)",
"UNKNOWN Relative Eff.(%)",
"FEMALE P Value",
"MALE P Value",
"UNKNOWN P Value")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE)
|
Word Category
|
FEMALE Relative Eff.(%)
|
MALE Relative Eff.(%)
|
UNKNOWN Relative Eff.(%)
|
FEMALE P Value
|
MALE P Value
|
UNKNOWN P Value
|
|
Risk
|
27.713
|
7.938
|
10.948
|
0.001
|
0.174
|
0.074
|
|
Female
|
26.164
|
22.59
|
34.262
|
0.001
|
0.005
|
0.001
|
|
Relig
|
25.409
|
-2.293
|
15.585
|
0.014
|
0.393
|
0.102
|
|
We
|
17.588
|
-0.364
|
18.167
|
0.028
|
0.471
|
0.002
|
|
Hear
|
10.457
|
-3.575
|
10.76
|
0.003
|
0.255
|
0.003
|
|
Power
|
9.825
|
0.762
|
5.091
|
0.001
|
0.417
|
0.012
|
|
Anx
|
8.477
|
6.309
|
29.838
|
0.174
|
0.270
|
0.031
|
|
Money
|
8.209
|
31.643
|
-0.583
|
0.029
|
0.001
|
0.464
|
|
Tentat
|
7.838
|
2.283
|
3.036
|
0.011
|
0.262
|
0.141
|
|
Relativ
|
7.719
|
1.424
|
2.714
|
0.001
|
0.256
|
0.070
|
|
Informal
|
6.653
|
4.143
|
0.35
|
0.001
|
0.001
|
0.391
|
|
Work
|
6.207
|
0.102
|
2.455
|
0.016
|
0.477
|
0.190
|
|
Focuspast
|
5.91
|
9.87
|
-4.13
|
0.010
|
0.007
|
0.024
|
|
Social
|
4.541
|
4.411
|
3.713
|
0.001
|
0.002
|
0.003
|
|
Focuspresent
|
3.932
|
10.171
|
3.492
|
0.007
|
0.001
|
0.001
|
|
Focusfuture
|
2.705
|
9.076
|
13.746
|
0.267
|
0.002
|
0.001
|
|
Nonflu
|
2.614
|
15.021
|
8.292
|
0.326
|
0.042
|
0.079
|
|
Negemo
|
1.98
|
13.916
|
2.728
|
0.253
|
0.001
|
0.156
|
|
Body
|
-0.127
|
31.309
|
15.016
|
0.494
|
0.001
|
0.001
|
|
Negate
|
-6.244
|
1.931
|
-0.057
|
0.080
|
0.346
|
0.483
|
|
Posemo
|
-7.802
|
-0.554
|
-0.266
|
0.001
|
0.434
|
0.423
|
|
Ingest
|
-17.991
|
-27.943
|
-12.164
|
0.188
|
0.012
|
0.207
|
|
See
|
-20.432
|
1.343
|
-17.405
|
0.001
|
0.419
|
0.001
|
|
Friend
|
-34.369
|
-16.925
|
-24.181
|
0.001
|
0.092
|
0.008
|
|
Filler
|
-40.55
|
-34.464
|
-52.18
|
0.002
|
0.007
|
0.001
|
Words inside categ:
The tables below contain the changes for the 4 most changed words (positive and negative) within each of the categories. The first column is the specific word. Changes_total is the difference between the “before” and “after” frequency of all users. Changes_f same as Changes_total but for female users, Changes_m for male users and Changes_u for unknown users.
library(tidyverse)
read_delim(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
delim = "\t", skip = 1, col_names = c("number", "name"),
n_max = 73) -> categories_name
read_tsv(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
skip = 75, col_names = paste0(c("x"), c("_"), 1:11),
guess_max = 6000, col_types = "ccccccccccc") %>%
mutate(x_1 = str_remove(x_1, "\\*")) -> words_dic
categories_name %>%
filter(name %in% c("female", "family", "anx", "shehe", "affiliation", "friend")) %>%
pull(number) -> number_top_categ
words_dic %>%
filter_at(vars(-x_1), any_vars(. %in% number_top_categ)) %>%
pull(x_1) -> words_cat
# neda_timelapse ----------------------------------------------------------
library(lubridate)
# neda_hist <- read_rds(here::here("data", "NEDA_historical_twenty.rds")) %>%
# distinct() %>%
# filter(created_at_tweet >= ymd("2019-03-01")) %>%
# mutate(text = str_to_lower(text),
# neda_related = str_detect(text,
# pattern = "#nedawareness|#comeasyouare|@nedastaff"))
# gender_output <- read_tsv(here::here("data", "gender_extractor",
# "neda_liwc_gender_output.tsv"),
# col_names = c("id", "name", "name_proc", "gender"))
gender_output <- read_rds(here::here("data", "neda_liwc_gender_twenty.rds")) %>%
select(gender, id_tweet, id)
# first_tweet <- neda_hist %>%
# select(created_at_tweet, text, id, id_tweet, neda_related) %>%
# filter(neda_related) %>%
# arrange(created_at_tweet) %>%
# group_by(id) %>%
# slice(1) %>%
# ungroup() %>%
# select(cero_date = created_at_tweet, id) %>%
# filter(cero_date >= ymd("2019-01-01"))
#
# neda_change <- neda_hist %>%
# select(id_tweet, text, created_at_tweet, id) %>%
# inner_join(first_tweet) %>%
# mutate(days_tweet = interval(start = cero_date, end = created_at_tweet),
# days_tweet = round(time_length(days_tweet, unit = "days"))) %>%
# select(-cero_date) %>%
# filter(days_tweet >= -15, days_tweet <= 15)
#
# neda_timelapse <- neda_change %>%
# count(id, before_after = sign(days_tweet), sort = T) %>%
# mutate(before_after = case_when(before_after == -1 ~ "before",
# before_after == 1 ~ "after",
# TRUE ~ "cero")) %>%
# pivot_wider(values_from = n, names_from = before_after) %>%
# filter(before >= 15 & after >= 15) %>%
# select(-cero) %>%
# semi_join(x = neda_change, y = .)
read_rds(here::here("data", "neda_timelapse_twenty.rds")) %>%
inner_join(gender_output) -> neda_timelapse
library(tidytext)
replace_reg1 <- "https://t.co/[A-Za-z]\\d]+|"
replace_reg2 <- "https://t.co/[A-Za-z]\\d]+|&|<|>|RT|https"
replace_reg <- paste0(replace_reg1, replace_reg2)
unnest_reg <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"
tidy_tweets <- neda_timelapse %>%
mutate(text = str_to_lower(text)) %>%
mutate(text = str_replace_all(text, replace_reg, "")) %>%
unnest_tokens(word, text, token = "regex", pattern = unnest_reg)
tidy_tweets %>%
filter(word %in% words_cat) %>%
mutate(days_tweet = case_when(days_tweet < 0 ~ "before",
days_tweet > 0 ~ "after",
TRUE ~ "cero")) %>%
filter(days_tweet != "cero") -> tidy_words
neda_liwc <- read_rds(here::here("data", "neda_liwc_twenty.rds")) %>%
select(-created_at_tweet) %>%
filter(abs(days_tweet) <= 15)
FEMALE
# female ------------------------------------------------------------------
# female -> 43
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 43)) %>%
pull(x_1) -> words_female
tidy_words %>%
filter(word %in% words_female) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_female) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| women |
816 |
323 |
110 |
383 |
| she |
489 |
181 |
67 |
241 |
| her |
431 |
234 |
51 |
146 |
| madam |
233 |
39 |
2 |
192 |
| girly |
-9 |
-4 |
-3 |
-2 |
| witch |
-9 |
-10 |
-2 |
3 |
| gal |
-10 |
-7 |
-2 |
-1 |
| gals |
-14 |
-5 |
0 |
-9 |
| lesbian |
-56 |
-24 |
-6 |
-26 |
RISK
# risk -> 85
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 85)) %>%
pull(x_1) -> words_risk
tidy_words %>%
filter(word %in% words_risk) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_risk) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| avoid |
92 |
39 |
19 |
34 |
| risk |
89 |
50 |
28 |
11 |
| threat |
51 |
32 |
7 |
12 |
| alarm |
11 |
4 |
2 |
5 |
| dread |
3 |
0 |
2 |
1 |
| doubt |
2 |
2 |
3 |
-3 |
| inhibit |
1 |
1 |
0 |
0 |
| unsure |
-3 |
0 |
-2 |
-1 |
RELIGION
# relig -> 114
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 114)) %>%
pull(x_1) -> words_relig
tidy_words %>%
filter(word %in% words_relig) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_relig) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| church |
31 |
20 |
9 |
2 |
| goddess |
4 |
2 |
0 |
2 |
| church |
31 |
20 |
9 |
2 |
| goddess |
4 |
2 |
0 |
2 |
MONEY
# money -> 113
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 113)) %>%
pull(x_1) -> words_money
tidy_words %>%
filter(word %in% words_money) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_money) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| donate |
10 |
6 |
1 |
3 |
| donation |
10 |
0 |
6 |
4 |
| donate |
10 |
6 |
1 |
3 |
| donation |
10 |
0 |
6 |
4 |
BODY
# body -> 71
words_dic %>%
filter_at(vars(-x_1), any_vars(. == 114)) %>%
pull(x_1) -> words_body
tidy_words %>%
filter(word %in% words_body) %>%
count(days_tweet, word) %>%
mutate(gender = "t") %>%
bind_rows(tidy_words %>%
filter(word %in% words_body) %>%
count(gender, days_tweet, word)) %>%
pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>%
mutate_if(is.numeric, ~replace_na(., 0)) %>%
mutate(changes_total = after_t - before_t,
changes_f = after_f - before_f,
changes_m = after_m - before_m,
changes_u = after_u - before_u) %>%
select(word, starts_with("changes")) %>%
arrange(desc(changes_total)) -> f
f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)
f_top %>%
bind_rows(f_bottom) %>%
gt::gt()
| word |
changes_total |
changes_f |
changes_m |
changes_u |
| church |
31 |
20 |
9 |
2 |
| goddess |
4 |
2 |
0 |
2 |
| church |
31 |
20 |
9 |
2 |
| goddess |
4 |
2 |
0 |
2 |
---
title: "Causal Impact" 
clean: true
output:
  bookdown::html_document2:
    number_sections: false
    code_download: true
    code_folding: hide
    self_contained: true
    toc: true
    toc_float: false
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

xfun::pkg_attach("tidyverse", "lubridate", "kableExtra")

theme_set(theme_linedraw())
```

## Causal Impact 

```{r echo=FALSE}
baseline_liwc <- read_rds(here::here("data", "baseline_liwc_twenty.rds"))

neda_liwc <- read_rds(here::here("data", "neda_liwc_twenty.rds")) %>% 
  select(-text, -created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15)

sel_cat <- c(
  "i","we","you","shehe","they","ipron","negate","compare","posemo","negemo","anx","anger","sad","social","family","friend","female","male","insight","cause","discrep","tentat","certain","differ","see","hear","feel","body","health","sexual","ingest","affiliation","achiev","power","reward","risk","focuspast","focuspresent","focusfuture","relativ","work","leisure","home","money","relig","death","informal","swear","assent","nonflu","filler"
)
```

Number of total users:
```{r}
neda_liwc %>% 
  select(id) %>% 
  distinct() %>% 
  nrow()
```

```{r echo=FALSE}
baseline_liwc %>% 
  select(-text, -created_at_tweet, -id_tweet) %>% 
  group_by(days_tweet, id) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline

neda_liwc %>% 
  select(-id_tweet) %>% 
  group_by(days_tweet, id) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(-days_tweet, names_to = "categ", values_to = "values_neda") -> ci_neda_liwc 
  
pre_period <- c(1, 16)
post_period <- c(17, 31)

ci_baseline %>% 
  inner_join(ci_neda_liwc) -> d_second

d_second %>% 
  select(categ, values_neda, values_baseline) %>% 
  nest(data = - categ) %>% 
  mutate(mod = map(data, ~CausalImpact::CausalImpact(., pre_period, post_period))) -> ci
  
ci %>% 
  mutate(summary_mod = map(mod, "summary")) %>% 
  filter(!map_lgl(summary_mod, is.null)) -> ci_resul 

ci_resul %>% 
  mutate(p = map(summary_mod, "p")) %>% 
  mutate(p = map_dbl(p, 1)) %>% 
  filter(p < 0.05) %>% 
  mutate(relative_effect = map(summary_mod, "RelEffect")) %>% 
  mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat

sig_cat %>% 
  filter(categ %in% sel_cat) -> sig_cat

categories_in_gender <- sig_cat %>% pull(categ)

sig_cat %>% 
  arrange(desc(relative_effect)) %>% 
  select(categ, second_relative_effect = relative_effect, p_second = p) -> second_ap

second_ap %>% 
  mutate(categ = str_to_title(categ)) %>% 
  mutate_at(vars(second_relative_effect), ~.*100) %>% 
  mutate_if(is.numeric, ~round(., digits = 3)) %>% 
  mutate_at(vars(second_relative_effect), function(x){
    cell_spec(x, "html", color = spec_color(x), bold = T)
  }) %>% 
  kable("html", escape = F,
        align = "lrr",
        col.names = c("Word Category", "Relative Eff.(%)",
                      "P Value")) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)
```

## CAUSAL IMPACT BY GENDER

```{r echo=FALSE}
baseline_liwc_gender <- read_rds(here::here("data",
                                            "baseline_liwc_gender_twenty.rds"))

neda_liwc_gender <- read_rds(here::here("data", 
                                        "neda_liwc_gender_twenty.rds")) %>% 
  # select(-text, -created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15)

sel_cat <- c(
  "i","we","you","shehe","they","ipron","negate","compare","posemo","negemo","anx","anger","sad","social","family","friend","female","male","insight","cause","discrep","tentat","certain","differ","see","hear","feel","body","health","sexual","ingest","affiliation","achiev","power","reward","risk","focuspast","focuspresent","focusfuture","relativ","work","leisure","home","money","relig","death","informal","swear","assent","nonflu","filler"
)
```

Number of users by gender (400~):
```{r}
neda_liwc_gender %>% 
  select(id, gender) %>% 
  distinct() %>% 
  count(gender)
```

Number of users in the baseline ():
```{r}
baseline_liwc_gender %>% 
  select(id, gender) %>% 
  distinct() %>% 
  count(gender)
```

```{r }
baseline_liwc_gender %>% 
  select(-text, -created_at_tweet, -id_tweet, -name) %>% 
  group_by(gender, id, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(gender, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(cols = fun:filler, names_to = "categ", values_to = "values_baseline") -> ci_baseline

neda_liwc_gender %>% 
  select(-id_tweet, -created_at_tweet) %>% 
  group_by(gender, id, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  select(-id) %>% 
  group_by(gender, days_tweet) %>% 
  summarise_all(mean) %>% 
  ungroup() %>% 
  pivot_longer(fun:filler, names_to = "categ", 
               values_to = "values_neda") -> ci_neda_liwc 

pre_period <- c(1, 16)
post_period <- c(17, 31)

ci_baseline %>% 
  full_join(ci_neda_liwc) -> d_second

d_second %>% 
  # count(gender, days_tweet) %>% 
  # view()
  # view()
  # select(categ, values_neda, values_baseline) %>% 
  # nest(data = - categ) %>% 
  select(gender, categ, values_neda, values_baseline) %>%
  # filter(gender == "m") %>% view()
  group_by(categ, gender) %>% 
  nest() %>% 
  ungroup() %>% 
  select(gender, categ, data) %>% 
  mutate(mod = map(data, ~CausalImpact::CausalImpact(., 
                                                     pre_period,
                                                     post_period))) -> ci
  
ci %>% 
  mutate(summary_mod = map(mod, "summary")) %>% 
  filter(!map_lgl(summary_mod, is.null)) -> ci_resul 

ci_resul %>% 
  mutate(p = map(summary_mod, "p")) %>% 
  mutate(p = map_dbl(p, 1)) %>% 
  filter(categ %in% categories_in_gender) %>% 
  mutate(relative_effect = map(summary_mod, "RelEffect")) %>% 
  mutate(relative_effect = map_dbl(relative_effect, 2))-> sig_cat

# sig_cat %>% 
#   filter(categ %in% sel_cat) -> sig_cat

sig_cat %>% 
  arrange(gender, desc(relative_effect)) %>% 
  select(gender, categ, 
         second_relative_effect = relative_effect, p_second = p) -> second_ap

second_ap %>% 
  pivot_wider(id_cols = categ, 
              values_from = c(second_relative_effect, p_second),
              names_from = gender) %>% 
  mutate(categ = str_to_title(categ)) %>% 
  mutate_at(vars(starts_with("second")), ~.*100) %>% 
  mutate_if(is.numeric, ~round(., digits = 3)) %>% 
  mutate_at(vars(starts_with("second")), function(x){
    cell_spec(x, "html", color = spec_color(x), bold = T)
  }) %>% 
  kable("html", escape = F,
        align = "lrr",
        col.names = c("Word Category", "FEMALE Relative Eff.(%)",
                      "MALE Relative Eff.(%)",
                      "UNKNOWN Relative Eff.(%)",
                      "FEMALE P Value",
                      "MALE P Value",
                      "UNKNOWN P Value")) %>%  
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)
```

## Words inside categ:

The tables below contain the changes for the 4 most changed words (positive and negative) within each of the categories. The first column is the specific word. Changes_total is the difference between the "before" and "after" frequency of all users. Changes_f same as Changes_total but for female users, Changes_m for male users and Changes_u for unknown users.

```{r }
library(tidyverse)

read_delim(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
           delim = "\t", skip = 1, col_names = c("number", "name"), 
           n_max = 73) -> categories_name 

read_tsv(here::here("data", "liwc", "LIWC2015_English_Flat.dic"),
         skip = 75, col_names = paste0(c("x"), c("_"), 1:11),
         guess_max = 6000, col_types = "ccccccccccc") %>% 
  mutate(x_1 = str_remove(x_1, "\\*")) -> words_dic

categories_name %>% 
  filter(name %in% c("female", "family", "anx", "shehe", "affiliation", "friend")) %>% 
  pull(number) -> number_top_categ

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. %in% number_top_categ)) %>%  
  pull(x_1) -> words_cat


# neda_timelapse ----------------------------------------------------------

library(lubridate)

# neda_hist <- read_rds(here::here("data", "NEDA_historical_twenty.rds")) %>% 
#   distinct() %>% 
#   filter(created_at_tweet >= ymd("2019-03-01")) %>% 
#   mutate(text = str_to_lower(text),
#          neda_related = str_detect(text, 
#                                    pattern = "#nedawareness|#comeasyouare|@nedastaff")) 

# gender_output <- read_tsv(here::here("data", "gender_extractor", 
#                                      "neda_liwc_gender_output.tsv"),
#                           col_names = c("id", "name", "name_proc", "gender"))

gender_output <- read_rds(here::here("data", "neda_liwc_gender_twenty.rds")) %>% 
  select(gender, id_tweet, id)
# first_tweet <- neda_hist %>% 
#   select(created_at_tweet, text, id, id_tweet, neda_related) %>% 
#   filter(neda_related) %>% 
#   arrange(created_at_tweet) %>% 
#   group_by(id) %>% 
#   slice(1) %>% 
#   ungroup() %>% 
#   select(cero_date = created_at_tweet, id) %>% 
#   filter(cero_date >= ymd("2019-01-01"))
# 
# neda_change <- neda_hist %>% 
#   select(id_tweet, text, created_at_tweet, id) %>% 
#   inner_join(first_tweet) %>% 
#   mutate(days_tweet = interval(start = cero_date, end = created_at_tweet),
#          days_tweet = round(time_length(days_tweet, unit = "days"))) %>% 
#   select(-cero_date) %>% 
#   filter(days_tweet >= -15, days_tweet <= 15)
# 
# neda_timelapse <- neda_change %>%
#   count(id, before_after = sign(days_tweet), sort = T) %>% 
#   mutate(before_after = case_when(before_after == -1 ~ "before",
#                                   before_after == 1 ~ "after",
#                                   TRUE ~ "cero")) %>% 
#   pivot_wider(values_from = n, names_from = before_after) %>% 
#   filter(before >= 15 & after >= 15) %>% 
#   select(-cero) %>% 
#   semi_join(x = neda_change, y = .)



read_rds(here::here("data", "neda_timelapse_twenty.rds")) %>% 
  inner_join(gender_output) -> neda_timelapse

library(tidytext)

replace_reg1 <- "https://t.co/[A-Za-z]\\d]+|"
replace_reg2 <- "https://t.co/[A-Za-z]\\d]+|&amp;|&lt;|&gt;|RT|https"
replace_reg <- paste0(replace_reg1, replace_reg2)
unnest_reg <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"

tidy_tweets <- neda_timelapse %>%
  mutate(text = str_to_lower(text)) %>% 
  mutate(text = str_replace_all(text, replace_reg, "")) %>% 
  unnest_tokens(word, text, token = "regex", pattern = unnest_reg)

tidy_tweets %>% 
  filter(word %in% words_cat) %>% 
  mutate(days_tweet = case_when(days_tweet < 0 ~ "before",
                                days_tweet > 0 ~ "after",
                                TRUE ~ "cero")) %>% 
  filter(days_tweet != "cero") -> tidy_words

neda_liwc <- read_rds(here::here("data", "neda_liwc_twenty.rds")) %>% 
  select(-created_at_tweet) %>% 
  filter(abs(days_tweet) <= 15)

```

### FEMALE 

```{r }
# female ------------------------------------------------------------------
# female -> 43

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 43)) %>% 
  pull(x_1) -> words_female

tidy_words %>% 
  filter(word %in% words_female) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_female) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
```

### RISK

```{r}
# risk -> 85

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 85)) %>% 
  pull(x_1) -> words_risk

tidy_words %>% 
  filter(word %in% words_risk) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_risk) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()
```

### RELIGION
```{r}
# relig -> 114

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 114)) %>% 
  pull(x_1) -> words_relig

tidy_words %>% 
  filter(word %in% words_relig) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_relig) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()

```

## MONEY

```{r}
# money -> 113

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 113)) %>% 
  pull(x_1) -> words_money

tidy_words %>% 
  filter(word %in% words_money) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_money) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()

```

## BODY
```{r}
# body -> 71

words_dic %>% 
  filter_at(vars(-x_1), any_vars(. == 114)) %>% 
  pull(x_1) -> words_body

tidy_words %>% 
  filter(word %in% words_body) %>% 
  count(days_tweet, word) %>% 
  mutate(gender = "t") %>% 
  bind_rows(tidy_words %>% 
              filter(word %in% words_body) %>% 
              count(gender, days_tweet, word)) %>% 
  pivot_wider(names_from = c(days_tweet, gender), values_from = n) %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate(changes_total = after_t - before_t,
         changes_f = after_f - before_f,
         changes_m = after_m - before_m,
         changes_u = after_u - before_u) %>% 
  select(word, starts_with("changes")) %>% 
  arrange(desc(changes_total)) -> f 

f_top <- f %>% top_n(4, wt = changes_total)
f_bottom <- f %>% top_n(-4, wt = changes_total)

f_top %>% 
  bind_rows(f_bottom) %>% 
  gt::gt()

```


## COMMON WORDS IN TWEETS BY WORDS CATEGORY:

```{r }
neda_liwc_gender %>% 
  filter(days_tweet >= 0) %>% 
  select(id_tweet, id, gender, female, risk, relig, body, money) %>% 
  pivot_longer(cols = female:money) %>% 
  filter(value > 0) %>% 
  left_join(neda_liwc %>% 
              select(id, id_tweet, text)) %>%  
  rename("category" = name) %>% 
  mutate(text = str_remove_all(text, pattern = "[:graph:]+(…)")) %>% 
  unnest_tokens(word, text, token = "tweets") %>%  
  filter(!word %in% stop_words$word,
         !word %in% c("de", 4, "da", "la", "en", "le", "los", 3),
         word != "rt",
         !word %in% str_remove_all(stop_words$word, "'")) %>% 
  # mutate(word = case_when(category == "female" & word %in% c(words_female,
  #                                                            "womens") ~ NA_character_,
  #                         category == "family" & word %in% c(words_family,
  #                                                            "parents",
  #                                                            "brothers")~ NA_character_,
  #                         category == "anx" & word %in% words_anxiety ~ NA_character_,
  #                         TRUE ~ word)) %>% 
  filter(!is.na(word),
         word != "amp") %>% 
  count(category, gender, word, sort = T) %>%
  group_by(gender, category) %>% 
  top_n(10, wt = n) %>% 
  ungroup() %>% 
  pivot_wider(id_cols = c(category, word), names_from = gender, values_from = n) %>% 
  mutate(category = str_to_title(category)) %>% 
  gt::gt(groupname_col = "category",
         rowname_col = "word")   
  
```

