Compare continous measure to binary measure

binary_gender <- read_csv("gender_grammar.csv")  %>%
  select(language_name, wikipedia_grammar_type)
binary_gender[binary_gender$language_name == "spanish; castilian", "language_name"] <- "spanish"
binary_gender[binary_gender$language_name ==  "dutch; flemish", "language_name"] <- "dutch"


continuous_gender <- read_csv("continuous_gender.csv")  

all_gender <- left_join(continuous_gender, 
                        binary_gender, by = c("language" = "language_name")) 

Compare continous gender measure to behavioral measure

behavioral <- read_csv("/Users/mollylewis/Documents/research/Projects/1_in_progress/IATLANG/exploratory_analyses/7_age_controls/by_language_df.csv") %>%
  mutate(language_name = tolower(language_name)) %>%
  select(language_name, es_behavioral_iat_resid_simple, es_behavioral_explicit_resid_simple, ggi, wps) %>%
  rename(language = language_name)

full_df <- full_join(behavioral, all_gender) %>%
  data.frame()

ggplot(full_df , aes(y = es_behavioral_iat_resid_simple, x = num_dif_trans)) +
  geom_label(aes(label= language)) +
  geom_smooth(method = "lm") +
  theme_classic()

binary_df <- full_df %>%
  mutate(empirical_binary = ifelse(num_dif_trans > 0, "gendered", "not_gendered")) %>%
  filter(!is.na(num_dif_trans),
         !is.na(es_behavioral_iat_resid_simple)) 

count(binary_df, empirical_binary, wikipedia_grammar_type) %>%
  kable()
empirical_binary wikipedia_grammar_type n
gendered CN 2
gendered MF 5
gendered MFN 3
not_gendered CN 1
not_gendered MFN 1
not_gendered none 9
t.test(binary_df[binary_df$empirical_binary == "gendered", "es_behavioral_iat_resid_simple"],
       binary_df[binary_df$empirical_binary != "gendered", "es_behavioral_iat_resid_simple"],
       paired = F)
## 
##  Welch Two Sample t-test
## 
## data:  binary_df[binary_df$empirical_binary == "gendered", "es_behavioral_iat_resid_simple"] and binary_df[binary_df$empirical_binary != "gendered", "es_behavioral_iat_resid_simple"]
## t = 2.6408, df = 18.908, p-value = 0.01616
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.004836846 0.041860433
## sample estimates:
##    mean of x    mean of y 
##  0.007512373 -0.015836267
binary_df %>%
  group_by(empirical_binary) %>%
  multi_boot_standard(col = "es_behavioral_iat_resid_simple", 
                      na.rm = T) %>%
  ggplot(aes(x = empirical_binary, y = mean)) +
  ylab("age-residualized behavioral IAT") +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper))+
  theme_classic()

binary_df %>%
  group_by(empirical_binary) %>%
  multi_boot_standard(col = "ggi", 
                      na.rm = T) %>%
  ggplot(aes(x = empirical_binary, y = mean)) +
  ylab("ggi") +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper))+
  theme_classic()

t.test(binary_df[binary_df$empirical_binary == "gendered", "es_behavioral_explicit_resid_simple"],
       binary_df[binary_df$empirical_binary != "gendered", "es_behavioral_explicit_resid_simple"],
       paired = F)
## 
##  Welch Two Sample t-test
## 
## data:  binary_df[binary_df$empirical_binary == "gendered", "es_behavioral_explicit_resid_simple"] and binary_df[binary_df$empirical_binary != "gendered", "es_behavioral_explicit_resid_simple"]
## t = 2.3847, df = 18.816, p-value = 0.02778
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01683944 0.25980743
## sample estimates:
##   mean of x   mean of y 
##  0.05642592 -0.08189752
binary_df %>%
  group_by(empirical_binary) %>%
  multi_boot_standard(col = "es_behavioral_explicit_resid_simple", 
                      na.rm = T) %>%
  ggplot(aes(x = empirical_binary, y = mean)) +
  ylab("age-residualized behavioral explicit") +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper))+
  theme_classic()