ES_PATH <- here("data/study1c/bnc_vs_coca_es.csv")
IAT_PATH <- here("data/study1c/AIID_subset_exploratory.csv")

lang_es <- read_csv(ES_PATH) %>%
  filter(model_source == "trained_both_fasttext_5.csv") %>%
  rename(domain = test)

#TARG_WORDS <- here("analyses/study1c/iat_stuff/all_target_words.csv") 
#targ_words <- read_csv(TARG_WORDS)
#count(targ_words, domain, cat_id)
raw_exp <- read_csv(IAT_PATH)
# get columsn we care about at drop NAs
exp_filtered <- raw_exp %>%
  mutate(domain = case_when(domain == "Determinism - Free will" ~ "Determinism - Free Will",
                            TRUE ~ domain)) %>%
  filter(domain %in% lang_es$domain) %>%
  mutate_if(is.character, as.factor)  %>%
  select(1,5,D,residence, sex, age, block_order, domain, 
         education, income, exclude_iat) %>%
  filter(!exclude_iat) %>%
  drop_na()

exp_filtered_countries <- exp_filtered %>%
  filter(residence %in% c("us",  "uk"))

iat_es <- exp_filtered_countries %>%
  add_residuals(lm(D ~ task_order + sex + age + block_order + education,
                   data = exp_filtered_countries)) %>%
  group_by(residence, domain) %>%
  summarize(resid = mean(resid)) %>%
  group_by(domain) %>%
  summarize(resid = mean(resid))
iat_es  %>%
  left_join(lang_es) %>%
  ggplot(aes(x = effect_size, y = resid)) +
  geom_text(aes(label = domain), size = 2) + 
  geom_smooth(method = "lm") +
  theme_classic()