analysis

# data <- read.csv("data_anon0109_coded.csv")
# corpus <- read.csv("corpusE.csv")
# data_corpus <- left_join(data, corpus, by = "Cue_renamed")
# 
# data_responses <- data_corpus %>%
#   group_by(subject, Cue_renamed, responses_theme) %>%
#   summarize(theme_resp_ratio_bysubj = mean(responses_theme))
# 
# data_freq <- corpus %>%
#   mutate(theme_ratio = theme.frequency / (theme.frequency + tax.frequency)) %>%
#   filter(!is.na(theme_ratio)) %>%
#   select(Cue_renamed, theme_ratio)
# 
# data_sum <- inner_join(data_responses, data_freq, by = "Cue_renamed") #removed lilypad

data <- read.csv("dataUSVN_corpusEV.csv") %>%
  mutate(theme_ratio_E = theme.frequency_E / (theme.frequency_E + tax.frequency_E)) %>%
  mutate(theme_ratio_V = theme.frequency_V / (theme.frequency_V + tax.frequency_V))

bias <- read.csv("bias_EV.csv") %>%
  select(Cue_renamed, Bias_E, Bias_V)

data <- left_join(data, bias, by = "Cue_renamed")

corpusE_omit <- read.csv("corpusE_0.csv") %>%
  filter(tax.frequency == 0 & theme.frequency == 0)

corpusV_omit <- read.csv("corpusV_0.csv") %>%
  filter(tax.frequency == 0 & theme.frequency == 0)

triads_omit <- append(corpusE_omit$Cue_renamed, corpusV_omit$Cue_renamed)

data <- data %>%
  filter(!(Cue_renamed %in% triads_omit))

# data_corpus <- left_join(data, corpusE, by = "Cue_renamed")
# data_corpus <- inner_join(data_corpus, corpusV, by = "Cue_renamed")
# 
# data_sum <- data_corpus %>%
#   group_by(subject, Cue_renamed, responses_theme, theme_ratio_V, theme_ratio_E) %>%
#   summarize(theme_resp_ratio_bysubj = mean(responses_theme))

#write.csv(data_sum, "dataSumUS_0115.csv")

US responses

Regressions

data_model <- data %>%
  filter(language == "English")

mV = glmer(responses_theme ~ theme_ratio_V + (1 | subject) + (1 | Cue_renamed), data = data_model, family="binomial")
plot(fitted(mV), residuals(mV))

summary(mV)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: responses_theme ~ theme_ratio_V + (1 | subject) + (1 | Cue_renamed)
##    Data: data_model
## 
##      AIC      BIC   logLik deviance df.resid 
##  10003.5  10032.1  -4997.7   9995.5     9499 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.7628 -0.6305  0.2538  0.6144 13.7471 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 1.071    1.035   
##  Cue_renamed (Intercept) 1.706    1.306   
## Number of obs: 9503, groups:  subject, 132; Cue_renamed, 72
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.5391     0.2551  -2.113 0.034617 *  
## theme_ratio_V   1.2932     0.3651   3.543 0.000396 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## theme_rat_V -0.707

mE = glmer(responses_theme ~ theme_ratio_E + (1 | subject) + (1 | Cue_renamed), data = data_model, family="binomial")
plot(fitted(mE), residuals(mE))

summary(mE)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: responses_theme ~ theme_ratio_E + (1 | subject) + (1 | Cue_renamed)
##    Data: data_model
## 
##      AIC      BIC   logLik deviance df.resid 
##   9997.8  10026.4  -4994.9   9989.8     9499 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.7324 -0.6319  0.2543  0.6122 13.8553 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 1.071    1.035   
##  Cue_renamed (Intercept) 1.548    1.244   
## Number of obs: 9503, groups:  subject, 132; Cue_renamed, 72
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.1510     0.3314  -3.473 0.000514 ***
## theme_ratio_E   2.1304     0.4796   4.442 8.92e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## theme_rat_E -0.851

#NEW, model with bias
mE_bias = glmer(responses_theme ~ theme_ratio_E + Bias_E + (1 | subject) + (1 | Cue_renamed), data = data_model, family="binomial")
plot(fitted(mE_bias), residuals(mE_bias))

summary(mE_bias)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: responses_theme ~ theme_ratio_E + Bias_E + (1 | subject) + (1 |  
##     Cue_renamed)
##    Data: data_model
## 
##      AIC      BIC   logLik deviance df.resid 
##   9999.4  10035.2  -4994.7   9989.4     9498 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.7341 -0.6319  0.2543  0.6121 13.8576 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 1.071    1.035   
##  Cue_renamed (Intercept) 1.538    1.240   
## Number of obs: 9503, groups:  subject, 132; Cue_renamed, 72
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.9825     1.3313  -1.489    0.136    
## theme_ratio_E   2.1751     0.4826   4.507 6.58e-06 ***
## Bias_Enone      0.8168     1.2675   0.644    0.519    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) thm__E
## theme_rat_E -0.346       
## Bias_Enone  -0.969  0.141

## English/Vietnamese comparison
mVE = glmer(responses_theme ~ theme_ratio_V + theme_ratio_E + (1 | subject) + (1 | Cue_renamed), data = data_model, family="binomial")
summary(mVE)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: responses_theme ~ theme_ratio_V + theme_ratio_E + (1 | subject) +  
##     (1 | Cue_renamed)
##    Data: data_model
## 
##      AIC      BIC   logLik deviance df.resid 
##   9998.6  10034.4  -4994.3   9988.6     9498 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.7480 -0.6313  0.2543  0.6125 13.8545 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  subject     (Intercept) 1.071    1.035   
##  Cue_renamed (Intercept) 1.524    1.235   
## Number of obs: 9503, groups:  subject, 132; Cue_renamed, 72
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.1351     0.3296  -3.444 0.000573 ***
## theme_ratio_V   0.4951     0.4547   1.089 0.276270    
## theme_ratio_E   1.6875     0.6266   2.693 0.007075 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) thm__V
## theme_rat_V  0.044       
## theme_rat_E -0.675 -0.650

Plot Model vs Data

require(ggrepel)

## Loading required package: ggrepel

dataUS <- data %>%
  filter(language == "English")
data_avgUS <- dataUS %>% #compute emp_theme_prop
  #select( -theme_resp_ratio_bysubj) %>%
  group_by(Cue_renamed) %>%
  summarize(emp_theme_prop = mean(responses_theme))

## `summarise()` ungrouping output (override with `.groups` argument)

corrE <- dataUS %>%
  ungroup() %>%
  select(-subject) %>%
  distinct(Cue_renamed, theme_ratio_E) %>%
  left_join(data_avgUS, by="Cue_renamed") %>%
  rename(triad=Cue_renamed) %>%
  rename(Ecorpus_theme_prop=theme_ratio_E)

corrE %>% ggplot(aes(x=emp_theme_prop, y=Ecorpus_theme_prop, label = triad)) + 
  geom_text(size = 3) + 
  #geom_label_repel() +
  theme_classic() +
  geom_abline(slope=1, colour="red", intercept = 0, alpha = 0.8) +
  xlab("Proportion Thematic Chosen (US)") +
  xlim (0, 1) +
  ylab("Proportion Thematic Predicted (English)") + 
  scale_x_continuous(expand = c(0, 0), limits = c(0, 1.05)) + 
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.05))

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

cor.test(corrE$Ecorpus_theme_prop,corrE$emp_theme_prop, na.rm=T)

## 
##  Pearson's product-moment correlation
## 
## data:  corrE$Ecorpus_theme_prop and corrE$emp_theme_prop
## t = 4.775, df = 70, p-value = 9.549e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2982524 0.6524220
## sample estimates:
##       cor 
## 0.4956722

corrV <- dataUS %>% 
  ungroup() %>%
  select(-subject) %>%
  distinct(Cue_renamed, theme_ratio_V) %>%
  left_join(data_avgUS, by="Cue_renamed") %>%
  rename(triad=Cue_renamed) %>%
  rename(Vcorpus_theme_prop=theme_ratio_V)

corrV %>% ggplot(aes(x=emp_theme_prop, y=Vcorpus_theme_prop, label = triad)) + 
  geom_text() + 
  #geom_label_repel() 
  geom_abline(slope=1, colour="red", intercept = 0, alpha = 0.8) +
  theme_classic() +
  xlab("Proportion Thematic Chosen (US)") +
  ylab("Proportion Thematic Predicted (Vietnamese)")

cor.test(corrV$Vcorpus_theme_prop,corrV$emp_theme_prop, na.rm=T)

## 
##  Pearson's product-moment correlation
## 
## data:  corrV$Vcorpus_theme_prop and corrV$emp_theme_prop
## t = 3.594, df = 70, p-value = 0.0006017
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1794280 0.5738843
## sample estimates:
##       cor 
## 0.3946901

dataUS %>% ggplot(aes(x=theme_ratio_E, y=theme_ratio_V, label = Cue_renamed)) + 
  geom_text() + 
  #geom_label_repel() +
  theme_classic() +
  xlab("Proportion Thematic Predicted (English)") +
  ylab("Proportion Thematic Predicted (Vietnamese)")

cor.test(corrE$Ecorpus_theme_prop,corrV$Vcorpus_theme_prop, na.rm=T)

## 
##  Pearson's product-moment correlation
## 
## data:  corrE$Ecorpus_theme_prop and corrV$Vcorpus_theme_prop
## t = 7.1387, df = 70, p-value = 6.975e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4912804 0.7656175
## sample estimates:
##      cor 
## 0.649075

VN responses

Regressions

#data_model <- data_responses
data_model <- data %>%
  filter(language == "Vietnamese")

mV = glmer(responses_theme ~ theme_ratio_V + (1 | subject) + (1 | Cue_renamed), data = data_model, family="binomial")
plot(fitted(mV), residuals(mV))

summary(mV)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: responses_theme ~ theme_ratio_V + (1 | subject) + (1 | Cue_renamed)
##    Data: data_model
## 
##      AIC      BIC   logLik deviance df.resid 
##   4214.4   4239.4  -2103.2   4206.4     3859 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2567 -0.6587  0.2701  0.6141  4.5409 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Cue_renamed (Intercept) 1.8235   1.3504  
##  subject     (Intercept) 0.4715   0.6866  
## Number of obs: 3863, groups:  Cue_renamed, 72; subject, 54
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.4826     0.2678  -1.802 0.071608 .  
## theme_ratio_V   1.3750     0.3848   3.573 0.000353 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## theme_rat_V -0.708

#NEW, with bias
mV_bias = glmer(responses_theme ~ theme_ratio_V + Bias_V + (1 | subject) + (1 | Cue_renamed), data = data_model, family="binomial")
plot(fitted(mV_bias), residuals(mV_bias))

summary(mV_bias)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: responses_theme ~ theme_ratio_V + Bias_V + (1 | subject) + (1 |  
##     Cue_renamed)
##    Data: data_model
## 
##      AIC      BIC   logLik deviance df.resid 
##   4217.3   4254.8  -2102.6   4205.3     3857 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.1698 -0.6596  0.2689  0.6152  4.5095 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Cue_renamed (Intercept) 1.7906   1.3381  
##  subject     (Intercept) 0.4714   0.6866  
## Number of obs: 3863, groups:  Cue_renamed, 72; subject, 54
## 
## Fixed effects:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -0.69377    0.36585  -1.896 0.057920 .  
## theme_ratio_V          1.43039    0.39672   3.606 0.000312 ***
## Bias_Vnone             0.32958    0.37563   0.877 0.380261    
## Bias_Vshared_morpheme -0.05723    0.55430  -0.103 0.917769    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) thm__V Bs_Vnn
## theme_rat_V -0.472              
## Bias_Vnone  -0.686 -0.017       
## Bs_Vshrd_mr -0.352 -0.250  0.462

mE = glmer(responses_theme ~ theme_ratio_E + (1 | subject) + (1 | Cue_renamed), data = data_model, family="binomial")
plot(fitted(mE), residuals(mE))

summary(mE)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: responses_theme ~ theme_ratio_E + (1 | subject) + (1 | Cue_renamed)
##    Data: data_model
## 
##      AIC      BIC   logLik deviance df.resid 
##   4213.3   4238.3  -2102.6   4205.3     3859 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.1753 -0.6559  0.2708  0.6150  4.4258 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Cue_renamed (Intercept) 1.7955   1.3400  
##  subject     (Intercept) 0.4714   0.6866  
## Number of obs: 3863, groups:  Cue_renamed, 72; subject, 54
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.9618     0.3615  -2.660 0.007810 ** 
## theme_ratio_E   1.9670     0.5250   3.746 0.000179 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## theme_rat_E -0.854

## English/Vietnamese comparison
mVE = glmer(responses_theme ~ theme_ratio_V + theme_ratio_E + (1 | subject) + (1 | Cue_renamed), data = data_model, family="binomial")
summary(mVE)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: responses_theme ~ theme_ratio_V + theme_ratio_E + (1 | subject) +  
##     (1 | Cue_renamed)
##    Data: data_model
## 
##      AIC      BIC   logLik deviance df.resid 
##   4212.9   4244.2  -2101.5   4202.9     3858 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2272 -0.6561  0.2694  0.6141  4.4519 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Cue_renamed (Intercept) 1.7291   1.3150  
##  subject     (Intercept) 0.4714   0.6866  
## Number of obs: 3863, groups:  Cue_renamed, 72; subject, 54
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)    -0.9350     0.3559  -2.627  0.00862 **
## theme_ratio_V   0.7674     0.4937   1.554  0.12009   
## theme_ratio_E   1.2789     0.6784   1.885  0.05941 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) thm__V
## theme_rat_V  0.045       
## theme_rat_E -0.677 -0.650

Plot Model vs Data

require(ggrepel)
dataVN <- data %>%
  filter(language == "Vietnamese")

data_avgV<- dataVN %>% #compute emp_theme_prop
  #select( -theme_resp_ratio_bysubj) %>%
  group_by(Cue_renamed) %>%
  summarize(emp_theme_prop = mean(responses_theme))

## `summarise()` ungrouping output (override with `.groups` argument)

corrE <- data_model %>% 
  ungroup() %>%
  select(-subject) %>%
  distinct(Cue_renamed, theme_ratio_E) %>%
  left_join(data_avgV, by="Cue_renamed") %>%
  rename(triad=Cue_renamed) %>%
  rename(Ecorpus_theme_prop=theme_ratio_E)

corrE %>% ggplot(aes(x=emp_theme_prop, y=Ecorpus_theme_prop, label = triad)) + 
  geom_text() + 
  #geom_label_repel() +
  theme_classic() +
  xlab("Proportion Thematic Chosen (VN)") +
  ylab("Proportion Thematic Predicted (English)")

## Warning: Removed 1 rows containing missing values (geom_text).

cor.test(corrE$Ecorpus_theme_prop,corrE$emp_theme_prop, na.rm=T)

## 
##  Pearson's product-moment correlation
## 
## data:  corrE$Ecorpus_theme_prop and corrE$emp_theme_prop
## t = 3.8511, df = 70, p-value = 0.0002574
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2064557 0.5924095
## sample estimates:
##       cor 
## 0.4181256

corrV <- data_model %>% 
  ungroup() %>%
  select(-subject) %>%
  distinct(Cue_renamed, theme_ratio_V) %>%
  left_join(data_avgV, by="Cue_renamed") %>%
  rename(triad=Cue_renamed) %>%
  rename(Vcorpus_theme_prop=theme_ratio_V)

corrV %>% ggplot(aes(x=emp_theme_prop, y=Vcorpus_theme_prop, label = triad)) + 
  geom_text(size = 3) + 
  #geom_label_repel() +
  geom_abline(slope=1, colour="red", intercept = 0.0, alpha = 0.8) +
  theme_classic() +
  xlab("Proportion Thematic Chosen (VN)") +
  xlim(0, 1) +
  ylab("Proportion Thematic Predicted (Vietnamese)")  + 
  scale_x_continuous(expand = c(0, 0), limits = c(-0.05, 1.05)) + 
  scale_y_continuous(expand = c(0, 0), limits = c(-0.05, 1.05))

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

## Warning: Removed 1 rows containing missing values (geom_text).

cor.test(corrV$Vcorpus_theme_prop,corrV$emp_theme_prop, na.rm=T)

## 
##  Pearson's product-moment correlation
## 
## data:  corrV$Vcorpus_theme_prop and corrV$emp_theme_prop
## t = 3.661, df = 70, p-value = 0.0004838
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1865288 0.5787907
## sample estimates:
##       cor 
## 0.4008744

data_model %>% ggplot(aes(x=theme_ratio_E, y=theme_ratio_V, label = Cue_renamed)) + 
  geom_text() + 
  #geom_label_repel() +
  theme_classic() +
  xlab("Proportion Thematic Predicted (English)") +
  ylab("Proportion Thematic Predicted (Vietnamese)")

## Warning: Removed 25 rows containing missing values (geom_text).

cor.test(corrE$Ecorpus_theme_prop,corrV$Vcorpus_theme_prop, na.rm=T)

## 
##  Pearson's product-moment correlation
## 
## data:  corrE$Ecorpus_theme_prop and corrV$Vcorpus_theme_prop
## t = 7.1387, df = 70, p-value = 6.975e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4912804 0.7656175
## sample estimates:
##      cor 
## 0.649075

Where do empirical results and corpus prediction differ?

dataUS_sum <-   dataUS%>% 
  group_by(subject, Cue_renamed, responses_theme) %>%
  summarize(theme_resp_ratio_bysubj = mean(responses_theme))%>%  
  #select( -theme_resp_ratio_bysubj) %>%
  group_by(Cue_renamed) %>%
  summarize(Eemp_theme_prop = mean(theme_resp_ratio_bysubj))

## `summarise()` regrouping output by 'subject', 'Cue_renamed' (override with `.groups` argument)

## `summarise()` ungrouping output (override with `.groups` argument)

dataVN_sum <- dataVN %>% 
  group_by(subject, Cue_renamed, responses_theme) %>%
  summarize(theme_resp_ratio_bysubj = mean(responses_theme))%>%  
  #select( -theme_resp_ratio_bysubj) %>%
  group_by(Cue_renamed) %>%
  summarize(Vemp_theme_prop = mean(theme_resp_ratio_bysubj))

## `summarise()` regrouping output by 'subject', 'Cue_renamed' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)

dataVN_sum <- na.omit(dataVN_sum)

E_V_resp_compare <- full_join(dataUS_sum, dataVN_sum, by = "Cue_renamed")

E_V_corp_compare <- data %>%
  select(Cue_renamed, theme_ratio_E, theme_ratio_V) %>%
  group_by(Cue_renamed) %>%
  summarize(theme_ratio_E = median(theme_ratio_E), 
            theme_ratio_V = median(theme_ratio_V))

## `summarise()` ungrouping output (override with `.groups` argument)

E_V_compare <- left_join(E_V_resp_compare, E_V_corp_compare)

## Joining, by = "Cue_renamed"

E_V_compare <- E_V_compare %>%
  mutate(emp_diff = Eemp_theme_prop - Vemp_theme_prop) %>%
  mutate(corpus_diff = theme_ratio_E - theme_ratio_V)

ggplot(data = E_V_compare, 
       mapping = aes(x = emp_diff, y = corpus_diff, label = Cue_renamed)) +
  geom_text(size = 3) + 
  #geom_label_repel() +
  geom_vline(xintercept = 0) +
  geom_hline(yintercept = 0) +
  theme_classic()