library(dplyr)
library(ggplot2)
library(Hmisc)
library(lme4)
library(afex)
library(knitr)
library(data.table)
library(zoo) #replace missing with col mean
library(lmerTest)

Data Preprocessing

df<-read.csv('revision_3.csv')

#convert numerical cols to factors
cols<-c('participant','prime','end','gender','idiom_lang','nat_LAN','phrase_condition','list')
df[cols]<-lapply(df[cols],factor)

colnames(df)[colnames(df)=="idiom_lang"] <- "Idiom_Language"
colnames(df)[colnames(df)=="phrase_condition"] <- "Phrase_Type"

levels(df$Phrase_Type)[levels(df$Phrase_Type)=="Experimental"] <- 'Idiomatic'
levels(df$nat_LAN)[levels(df$nat_LAN)=="2"] <- 'Mandarin-English Bilinguals'
levels(df$nat_LAN)[levels(df$nat_LAN)=="1"] <- 'Native English Monolinguals'

#replacing missing numerical values with col mean
cols1<-c('EN_rating','CN_rating','freq_HAL','age')
df[cols1] <- sapply(na.aggregate(df[cols1]),as.numeric)


head(df)

Remove RTs 2SD away from group mean

df3 <- df %>%
            group_by(nat_LAN) %>%
            filter(!(exp_resp.rt - median(exp_resp.rt)) > abs(2*sd(exp_resp.rt)))

#% of data points removed
1-nrow(df3)/nrow(df)

## [1] 0.06933198

Visualize RT distribution

ggplot(df3, aes(Idiom_Language, exp_resp.rt, color = Phrase_Type))+
  geom_boxplot()+
  ylab('Reaction time')+
  facet_wrap(~nat_LAN)

Dummy code Binary categorical variables

#preserve df3 as the trimmed dataset with original variable levels
df4<-df3

levels(df4$nat_LAN)<-c(0,1)
colnames(df4)[colnames(df4)=="nat_LAN"] <- "IsBilingual"

levels(df4$Idiom_Language)<-c(1,0)
colnames(df4)[colnames(df4)=="Idiom_Language"] <- "IsChineseIdiom"

levels(df4$Phrase_Type)<-c(0,1)
colnames(df4)[colnames(df4)=="Phrase_Type"] <- "IsExperimental"


df5<-df4[!df4$end=="gold",]
df5<-df5[!df5$end=="water",]

df5<- df5[ -c(17,18) ] #remove duplicate columns

Mean and sd of every condition

grouped_mean<-df5 %>%
  group_by(IsExperimental, IsBilingual,IsChineseIdiom) %>%
  summarise(mean(exp_resp.rt), sd(exp_resp.rt), mean(log(exp_resp.rt)), sd(log(exp_resp.rt)))

## `summarise()` regrouping output by 'IsExperimental', 'IsBilingual' (override with `.groups` argument)

kable(grouped_mean)

IsExperimental	IsBilingual	IsChineseIdiom	mean(exp_resp.rt)	sd(exp_resp.rt)	mean(log(exp_resp.rt))	sd(log(exp_resp.rt))
0	0	1	636.6360	126.6395	6.436875	0.1962024
0	0	0	645.6663	127.1494	6.450971	0.1973788
0	1	1	728.0846	169.0169	6.564474	0.2267021
0	1	0	729.2199	172.1004	6.564876	0.2322806
1	0	1	610.0505	122.4104	6.394016	0.1970651
1	0	0	576.4549	138.2126	6.328585	0.2384026
1	1	1	702.3808	160.9192	6.529157	0.2240436
1	1	0	699.2999	162.8246	6.523844	0.2284220

Omnibus test

m<-lmer(log(exp_resp.rt)~IsExperimental*IsChineseIdiom*IsBilingual+ 
              length+log(freq_HAL)+
          semantic_relatedness+literality+
                  (1+IsExperimental+IsChineseIdiom|participant)+(1|end),
                  df5,
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)

summary(m)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental * IsChineseIdiom * IsBilingual +  
##     length + log(freq_HAL) + semantic_relatedness + literality +  
##     (1 + IsExperimental + IsChineseIdiom | participant) + (1 |      end)
##    Data: df5
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    -4428    -4286     2234    -4468     8940 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.9159 -0.6725 -0.0724  0.6181  3.7056 
## 
## Random effects:
##  Groups      Name            Variance  Std.Dev. Corr       
##  end         (Intercept)     0.0025510 0.05051             
##  participant (Intercept)     0.0116526 0.10795             
##              IsExperimental1 0.0008852 0.02975   0.28      
##              IsChineseIdiom0 0.0003493 0.01869  -0.25  0.10
##  Residual                    0.0327306 0.18092             
## Number of obs: 8960, groups:  end, 170; participant, 122
## 
## Fixed effects:
##                                                Estimate Std. Error
## (Intercept)                                   6.674e+00  5.043e-02
## IsExperimental1                              -2.958e-02  1.443e-02
## IsChineseIdiom0                               1.250e-02  1.374e-02
## IsBilingual1                                  1.302e-01  2.111e-02
## length                                        1.439e-02  3.840e-03
## log(freq_HAL)                                -2.136e-02  3.294e-03
## semantic_relatedness                         -3.631e-02  2.634e-02
## literality                                   -2.787e-02  5.822e-03
## IsExperimental1:IsChineseIdiom0              -7.285e-02  1.898e-02
## IsExperimental1:IsBilingual1                  8.673e-03  1.231e-02
## IsChineseIdiom0:IsBilingual1                 -1.668e-02  1.163e-02
## IsExperimental1:IsChineseIdiom0:IsBilingual1  7.793e-02  1.543e-02
##                                                      df t value Pr(>|t|)
## (Intercept)                                   1.929e+02 132.347  < 2e-16
## IsExperimental1                               2.240e+02  -2.049 0.041617
## IsChineseIdiom0                               2.235e+02   0.910 0.363607
## IsBilingual1                                  1.252e+02   6.168 8.81e-09
## length                                        1.614e+02   3.746 0.000250
## log(freq_HAL)                                 1.585e+02  -6.487 1.06e-09
## semantic_relatedness                          1.829e+02  -1.379 0.169698
## literality                                    1.770e+02  -4.787 3.56e-06
## IsExperimental1:IsChineseIdiom0               2.160e+02  -3.839 0.000162
## IsExperimental1:IsBilingual1                  3.283e+02   0.704 0.481645
## IsChineseIdiom0:IsBilingual1                  4.244e+02  -1.434 0.152270
## IsExperimental1:IsChineseIdiom0:IsBilingual1  8.532e+03   5.051 4.48e-07
##                                                 
## (Intercept)                                  ***
## IsExperimental1                              *  
## IsChineseIdiom0                                 
## IsBilingual1                                 ***
## length                                       ***
## log(freq_HAL)                                ***
## semantic_relatedness                            
## literality                                   ***
## IsExperimental1:IsChineseIdiom0              ***
## IsExperimental1:IsBilingual1                    
## IsChineseIdiom0:IsBilingual1                    
## IsExperimental1:IsChineseIdiom0:IsBilingual1 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 IsChI0 IsBln1 length l(_HAL smntc_ ltrlty
## IsExprmntl1 -0.016                                                 
## IsChinsIdm0 -0.244  0.455                                          
## IsBilingul1 -0.202  0.051  0.130                                   
## length      -0.624 -0.043  0.102  0.003                            
## lg(frq_HAL) -0.793 -0.012  0.070 -0.002  0.271                     
## smntc_rltdn -0.031  0.016  0.090  0.002  0.032 -0.089              
## literality  -0.449 -0.231  0.021  0.000  0.214  0.153 -0.169       
## IsExp1:ICI0  0.157 -0.646 -0.698 -0.074 -0.039 -0.025 -0.060 -0.084
## IsExpr1:IB1  0.029 -0.401 -0.179 -0.130 -0.006 -0.002  0.000 -0.001
## IsChnI0:IB1  0.062 -0.181 -0.407 -0.323 -0.005  0.004 -0.001  0.004
## IE1:ICI0:IB -0.038  0.257  0.280  0.193  0.006 -0.003  0.000 -0.002
##             IsE1:ICI0 IE1:IB ICI0:I
## IsExprmntl1                        
## IsChinsIdm0                        
## IsBilingul1                        
## length                             
## lg(frq_HAL)                        
## smntc_rltdn                        
## literality                         
## IsExp1:ICI0                        
## IsExpr1:IB1  0.245                 
## IsChnI0:IB1  0.269     0.450       
## IE1:ICI0:IB -0.385    -0.644 -0.690

# reduced model
step_result<-step(m)
final_m <- get_model(step_result)


summary(final_m)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental + IsChineseIdiom + IsBilingual +  
##     length + log(freq_HAL) + literality + (1 | end) + (IsExperimental |  
##     participant) + IsExperimental:IsChineseIdiom + IsExperimental:IsBilingual +  
##     IsChineseIdiom:IsBilingual + IsExperimental:IsChineseIdiom:IsBilingual
##    Data: df5
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -4431.6  -4318.0   2231.8  -4463.6     8944 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.9028 -0.6724 -0.0691  0.6172  3.6916 
## 
## Random effects:
##  Groups      Name            Variance Std.Dev. Corr
##  end         (Intercept)     0.002529 0.05029      
##  participant (Intercept)     0.011218 0.10592      
##              IsExperimental1 0.000881 0.02968  0.29
##  Residual                    0.032832 0.18120      
## Number of obs: 8960, groups:  end, 170; participant, 122
## 
## Fixed effects:
##                                                Estimate Std. Error
## (Intercept)                                   6.671e+00  5.019e-02
## IsExperimental1                              -2.938e-02  1.439e-02
## IsChineseIdiom0                               1.446e-02  1.344e-02
## IsBilingual1                                  1.301e-01  2.077e-02
## length                                        1.457e-02  3.826e-03
## log(freq_HAL)                                -2.175e-02  3.270e-03
## literality                                   -2.919e-02  5.722e-03
## IsExperimental1:IsChineseIdiom0              -7.442e-02  1.889e-02
## IsExperimental1:IsBilingual1                  8.926e-03  1.231e-02
## IsChineseIdiom0:IsBilingual1                 -1.661e-02  1.113e-02
## IsExperimental1:IsChineseIdiom0:IsBilingual1  7.767e-02  1.545e-02
##                                                      df t value Pr(>|t|)
## (Intercept)                                   1.954e+02 132.917  < 2e-16
## IsExperimental1                               2.282e+02  -2.041 0.042364
## IsChineseIdiom0                               2.285e+02   1.076 0.282937
## IsBilingual1                                  1.370e+02   6.265 4.51e-09
## length                                        1.641e+02   3.809 0.000197
## log(freq_HAL)                                 1.616e+02  -6.651 4.27e-10
## literality                                    1.876e+02  -5.102 8.21e-07
## IsExperimental1:IsChineseIdiom0               2.205e+02  -3.939 0.000110
## IsExperimental1:IsBilingual1                  3.300e+02   0.725 0.469052
## IsChineseIdiom0:IsBilingual1                  8.609e+03  -1.492 0.135818
## IsExperimental1:IsChineseIdiom0:IsBilingual1  8.605e+03   5.028 5.06e-07
##                                                 
## (Intercept)                                  ***
## IsExperimental1                              *  
## IsChineseIdiom0                                 
## IsBilingual1                                 ***
## length                                       ***
## log(freq_HAL)                                ***
## literality                                   ***
## IsExperimental1:IsChineseIdiom0              ***
## IsExperimental1:IsBilingual1                    
## IsChineseIdiom0:IsBilingual1                    
## IsExperimental1:IsChineseIdiom0:IsBilingual1 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 IsChI0 IsBln1 length l(_HAL ltrlty IsE1:ICI0
## IsExprmntl1 -0.016                                                    
## IsChinsIdm0 -0.235  0.458                                             
## IsBilingul1 -0.199  0.051  0.106                                      
## length      -0.625 -0.044  0.100  0.003                               
## lg(frq_HAL) -0.800 -0.010  0.080 -0.002  0.275                        
## literality  -0.462 -0.232  0.038  0.000  0.223  0.141                 
## IsExp1:ICI0  0.156 -0.647 -0.708 -0.075 -0.037 -0.030 -0.096          
## IsExpr1:IB1  0.029 -0.402 -0.178 -0.131 -0.006 -0.002 -0.001  0.246   
## IsChnI0:IB1  0.051 -0.184 -0.398 -0.271 -0.005  0.004  0.004  0.283   
## IE1:ICI0:IB -0.038  0.258  0.287  0.196  0.006 -0.003 -0.002 -0.387   
##             IE1:IB ICI0:I
## IsExprmntl1              
## IsChinsIdm0              
## IsBilingul1              
## length                   
## lg(frq_HAL)              
## literality               
## IsExp1:ICI0              
## IsExpr1:IB1              
## IsChnI0:IB1  0.457       
## IE1:ICI0:IB -0.644 -0.721

significant 3-way interaction

Get native English speaker model

# full model
m_e<-lmer(log(exp_resp.rt)~IsExperimental*IsChineseIdiom+
              length+EN_rating+log(freq_HAL)+semantic_relatedness+ literality+
                  (1+IsExperimental+IsChineseIdiom|participant)+(1|end),
                  df5[df5$IsBilingual=='0',],
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)

summary(m_e)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: log(exp_resp.rt) ~ IsExperimental * IsChineseIdiom + length +  
##     EN_rating + log(freq_HAL) + semantic_relatedness + literality +  
##     (1 + IsExperimental + IsChineseIdiom | participant) + (1 |      end)
##    Data: df5[df5$IsBilingual == "0", ]
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -2677.3  -2567.6   1355.7  -2711.3     4670 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.2931 -0.6424 -0.0489  0.6073  3.6620 
## 
## Random effects:
##  Groups      Name            Variance  Std.Dev. Corr       
##  end         (Intercept)     0.0024933 0.04993             
##  participant (Intercept)     0.0104809 0.10238             
##              IsExperimental1 0.0013086 0.03618   0.49      
##              IsChineseIdiom0 0.0006344 0.02519  -0.32  0.07
##  Residual                    0.0297639 0.17252             
## Number of obs: 4687, groups:  end, 170; participant, 63
## 
## Fixed effects:
##                                   Estimate Std. Error         df t value
## (Intercept)                       6.664030   0.053826 178.738651 123.808
## IsExperimental1                  -0.024858   0.014466 163.639745  -1.718
## IsChineseIdiom0                   0.108096   0.052237 157.576153   2.069
## length                            0.010737   0.004214 158.964277   2.548
## EN_rating                        -0.022358   0.011737 155.916628  -1.905
## log(freq_HAL)                    -0.014498   0.003535 155.725206  -4.101
## semantic_relatedness             -0.010441   0.028423 164.847234  -0.367
## literality                       -0.034564   0.006277 167.482958  -5.506
## IsExperimental1:IsChineseIdiom0  -0.071035   0.018602 153.249022  -3.819
##                                 Pr(>|t|)    
## (Intercept)                      < 2e-16 ***
## IsExperimental1                 0.087620 .  
## IsChineseIdiom0                 0.040147 *  
## length                          0.011778 *  
## EN_rating                       0.058628 .  
## log(freq_HAL)                   6.61e-05 ***
## semantic_relatedness            0.713838    
## literality                      1.36e-07 ***
## IsExperimental1:IsChineseIdiom0 0.000194 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 IsChI0 length EN_rtn l(_HAL smntc_ ltrlty
## IsExprmntl1  0.023                                                 
## IsChinsIdm0  0.048  0.108                                          
## length      -0.589 -0.048  0.249                                   
## EN_rating   -0.115  0.006 -0.965 -0.230                            
## lg(frq_HAL) -0.771 -0.014  0.147  0.298 -0.133                     
## smntc_rltdn -0.038  0.009  0.028  0.035 -0.003 -0.088              
## literality  -0.455 -0.248  0.044  0.227 -0.040  0.159 -0.148       
## IsExp1:ICI0  0.153 -0.626 -0.181 -0.041  0.001 -0.024 -0.067 -0.091

# reduced model
step_result<-step(m_e)
final_model <- get_model(step_result)

final_model

## Linear mixed model fit by maximum likelihood  ['lmerModLmerTest']
## Formula: log(exp_resp.rt) ~ IsExperimental + IsChineseIdiom + length +  
##     log(freq_HAL) + literality + (1 | end) + (IsExperimental |  
##     participant) + IsExperimental:IsChineseIdiom
##    Data: df5[df5$IsBilingual == "0", ]
##       AIC       BIC    logLik  deviance  df.resid 
## -2679.102 -2601.671  1351.551 -2703.102      4675 
## Random effects:
##  Groups      Name            Std.Dev. Corr
##  end         (Intercept)     0.05059      
##  participant (Intercept)     0.09894      
##              IsExperimental1 0.03598  0.51
##  Residual                    0.17301      
## Number of obs: 4687, groups:  end, 170; participant, 63
## Fixed Effects:
##                     (Intercept)                  IsExperimental1  
##                        6.651255                        -0.024785  
##                 IsChineseIdiom0                           length  
##                        0.012931                         0.008993  
##                   log(freq_HAL)                       literality  
##                       -0.015520                        -0.035409  
## IsExperimental1:IsChineseIdiom0  
##                       -0.071482

summary(final_model)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: log(exp_resp.rt) ~ IsExperimental + IsChineseIdiom + length +  
##     log(freq_HAL) + literality + (1 | end) + (IsExperimental |  
##     participant) + IsExperimental:IsChineseIdiom
##    Data: df5[df5$IsBilingual == "0", ]
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -2679.1  -2601.7   1351.6  -2703.1     4675 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.2772 -0.6462 -0.0418  0.6061  3.6389 
## 
## Random effects:
##  Groups      Name            Variance Std.Dev. Corr
##  end         (Intercept)     0.002559 0.05059      
##  participant (Intercept)     0.009790 0.09894      
##              IsExperimental1 0.001295 0.03598  0.51
##  Residual                    0.029933 0.17301      
## Number of obs: 4687, groups:  end, 170; participant, 63
## 
## Fixed effects:
##                                   Estimate Std. Error         df t value
## (Intercept)                       6.651255   0.053813 181.119708 123.599
## IsExperimental1                  -0.024785   0.014584 165.909043  -1.699
## IsChineseIdiom0                   0.012931   0.013329 160.913824   0.970
## length                            0.008993   0.004138 161.412590   2.173
## log(freq_HAL)                    -0.015520   0.003524 157.912284  -4.404
## literality                       -0.035409   0.006262 173.562725  -5.655
## IsExperimental1:IsChineseIdiom0  -0.071482   0.018743 155.646622  -3.814
##                                 Pr(>|t|)    
## (Intercept)                      < 2e-16 ***
## IsExperimental1                 0.091104 .  
## IsChineseIdiom0                 0.333425    
## length                          0.031231 *  
## log(freq_HAL)                   1.95e-05 ***
## literality                      6.31e-08 ***
## IsExperimental1:IsChineseIdiom0 0.000197 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 IsChI0 length l(_HAL ltrlty
## IsExprmntl1  0.024                                   
## IsChinsIdm0 -0.230  0.446                            
## length      -0.638 -0.048  0.107                     
## lg(frq_HAL) -0.809 -0.012  0.083  0.281              
## literality  -0.475 -0.250  0.037  0.232  0.144       
## IsExp1:ICI0  0.152 -0.628 -0.707 -0.039 -0.030 -0.103

No significant main effect of phrase type or phrase language, there is a significant interaction effect

Look at Chinese phrases and English phrases separately

# Chinese phrases
m_ec<-lmer(log(exp_resp.rt)~ IsExperimental+ literality+ 
              length+EN_rating+log(freq_HAL)+ 
                  (IsExperimental|participant)+(1|end),
                  subset(df5, df5$IsChineseIdiom=="1"& df5$IsBilingual=="0"),
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)
summary(m_ec)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental + literality + length + EN_rating +  
##     log(freq_HAL) + (IsExperimental | participant) + (1 | end)
##    Data: subset(df5, df5$IsChineseIdiom == "1" & df5$IsBilingual == "0")
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -1574.7  -1511.6    798.4  -1596.7     2293 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.5181 -0.6553 -0.0768  0.5951  3.1674 
## 
## Random effects:
##  Groups      Name            Variance  Std.Dev. Corr
##  end         (Intercept)     0.0020849 0.04566      
##  participant (Intercept)     0.0110869 0.10529      
##              IsExperimental1 0.0003273 0.01809  0.40
##  Residual                    0.0259327 0.16104      
## Number of obs: 2304, groups:  end, 82; participant, 63
## 
## Fixed effects:
##                  Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)      6.643222   0.070273 79.495093  94.534  < 2e-16 ***
## IsExperimental1 -0.032935   0.013481 69.821193  -2.443 0.017097 *  
## literality      -0.021926   0.008942 77.880441  -2.452 0.016447 *  
## length           0.005402   0.005438 72.937557   0.993 0.323804    
## EN_rating        0.002169   0.016501 72.786692   0.131 0.895769    
## log(freq_HAL)   -0.016666   0.004439 72.560580  -3.755 0.000347 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 ltrlty length EN_rtn
## IsExprmntl1  0.103                            
## literality  -0.492 -0.391                     
## length      -0.506 -0.037  0.129              
## EN_rating   -0.334 -0.084  0.210 -0.223       
## lg(frq_HAL) -0.768 -0.002  0.087  0.238  0.039

Phrase type is now significant, frequency, and literality is significant.

# English phrases
m_ee<-lmer(log(exp_resp.rt)~IsExperimental+literality+ 
              length+EN_rating+log(freq_HAL)+
                  (IsExperimental|participant)+(1|end),
                  subset(df5, df5$IsChineseIdiom=="0"& df5$IsBilingual=="0"),
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)
summary(m_ee)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental + literality + length + EN_rating +  
##     log(freq_HAL) + (IsExperimental | participant) + (1 | end)
##    Data: subset(df5, df5$IsChineseIdiom == "0" & df5$IsBilingual == "0")
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -1037.5   -974.0    529.8  -1059.5     2372 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.7769 -0.6535 -0.0501  0.6372  3.4075 
## 
## Random effects:
##  Groups      Name            Variance Std.Dev. Corr
##  end         (Intercept)     0.002325 0.04822      
##  participant (Intercept)     0.008710 0.09333      
##              IsExperimental1 0.003797 0.06162  0.32
##  Residual                    0.033207 0.18223      
## Number of obs: 2383, groups:  end, 88; participant, 63
## 
## Fixed effects:
##                  Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)      6.802166   0.098005 82.703538  69.406  < 2e-16 ***
## IsExperimental1 -0.091286   0.016785 97.600697  -5.439 3.97e-07 ***
## literality      -0.038177   0.008566 88.389842  -4.457 2.43e-05 ***
## length           0.013936   0.006070 82.012900   2.296   0.0242 *  
## EN_rating       -0.036472   0.016703 79.431723  -2.184   0.0319 *  
## log(freq_HAL)   -0.010435   0.005341 79.628824  -1.954   0.0542 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 ltrlty length EN_rtn
## IsExprmntl1  0.068                            
## literality  -0.210 -0.449                     
## length      -0.288 -0.153  0.320              
## EN_rating   -0.657  0.105 -0.257 -0.280       
## lg(frq_HAL) -0.396 -0.114  0.275  0.366 -0.318

Phrase type is still significant. Familarity rating, literality, word length are significant. Frequency marginally significant, showing English participants’ RT was not affected by word frequency.

Get bilingual particpants model

# full model
m_c<-lmer(log(exp_resp.rt)~IsExperimental*IsChineseIdiom+length+ CN_rating+ literality+
            semantic_relatedness+log(freq_HAL)+CAN_year+
                  (IsExperimental+IsChineseIdiom|participant)+(1|end),
                  df5[df5$IsBilingual=='1',],
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)

summary(m_c)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: log(exp_resp.rt) ~ IsExperimental * IsChineseIdiom + length +  
##     CN_rating + literality + semantic_relatedness + log(freq_HAL) +  
##     CAN_year + (IsExperimental + IsChineseIdiom | participant) +  
##     (1 | end)
##    Data: df5[df5$IsBilingual == "1", ]
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -1800.7  -1686.2    918.4  -1836.7     4255 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.7175 -0.6747 -0.1050  0.6127  3.5911 
## 
## Random effects:
##  Groups      Name            Variance  Std.Dev. Corr       
##  end         (Intercept)     0.0034385 0.05864             
##  participant (Intercept)     0.0117841 0.10855             
##              IsExperimental1 0.0003869 0.01967   0.05      
##              IsChineseIdiom0 0.0002289 0.01513   0.01 -0.08
##  Residual                    0.0345525 0.18588             
## Number of obs: 4273, groups:  end, 170; participant, 59
## 
## Fixed effects:
##                                   Estimate Std. Error         df t value
## (Intercept)                      6.883e+00  7.601e-02  2.339e+02  90.553
## IsExperimental1                 -2.832e-02  1.627e-02  1.574e+02  -1.741
## IsChineseIdiom0                 -4.853e-03  1.970e-02  1.732e+02  -0.246
## length                           2.192e-02  4.759e-03  1.649e+02   4.605
## CN_rating                       -6.279e-04  6.338e-03  1.990e+02  -0.099
## literality                      -1.967e-02  7.238e-03  1.727e+02  -2.717
## semantic_relatedness            -4.718e-02  3.279e-02  1.740e+02  -1.439
## log(freq_HAL)                   -2.788e-02  4.086e-03  1.622e+02  -6.822
## CAN_year                        -1.255e-02  6.717e-03  5.882e+01  -1.868
## IsExperimental1:IsChineseIdiom0  6.594e-03  2.167e-02  1.590e+02   0.304
##                                 Pr(>|t|)    
## (Intercept)                      < 2e-16 ***
## IsExperimental1                  0.08371 .  
## IsChineseIdiom0                  0.80566    
## length                          8.21e-06 ***
## CN_rating                        0.92118    
## literality                       0.00725 ** 
## semantic_relatedness             0.15208    
## log(freq_HAL)                   1.69e-10 ***
## CAN_year                         0.06671 .  
## IsExperimental1:IsChineseIdiom0  0.76126    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 IsChI0 length CN_rtn ltrlty smntc_ l(_HAL CAN_yr
## IsExprmntl1 -0.010                                                        
## IsChinsIdm0 -0.376  0.363                                                 
## length      -0.496 -0.056  0.062                                          
## CN_rating   -0.362  0.005  0.617 -0.031                                   
## literality  -0.372 -0.259  0.023  0.211  0.001                            
## smntc_rltdn -0.054  0.011  0.111  0.036  0.056 -0.152                     
## lg(frq_HAL) -0.664 -0.018  0.082  0.261  0.030  0.158 -0.084              
## CAN_year    -0.459  0.005  0.002  0.002  0.000 -0.005  0.004  0.002       
## IsExp1:ICI0  0.135 -0.655 -0.563 -0.032 -0.020 -0.095 -0.067 -0.029 -0.003

# reduced model
step_resultc<-step(m_c)
final_model_c <- get_model(step_resultc)

#final_model_c

summary(final_model_c)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental + length + literality + log(freq_HAL) +  
##     (1 | end) + (1 | participant)
##    Data: df5[df5$IsBilingual == "1", ]
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -1813.8  -1762.9    914.9  -1829.8     4265 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8145 -0.6780 -0.1049  0.6126  3.5662 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  end         (Intercept) 0.003436 0.05862 
##  participant (Intercept) 0.012783 0.11306 
##  Residual                0.034726 0.18635 
## Number of obs: 4273, groups:  end, 170; participant, 59
## 
## Fixed effects:
##                   Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)       6.810274   0.059998 189.958830 113.509  < 2e-16 ***
## IsExperimental1  -0.026235   0.012018 164.367855  -2.183  0.03045 *  
## length            0.022136   0.004721 166.853395   4.689 5.69e-06 ***
## literality       -0.021056   0.007107 179.100732  -2.963  0.00346 ** 
## log(freq_HAL)    -0.028345   0.004050 164.419871  -6.999 6.23e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 length ltrlty
## IsExprmntl1  0.129                     
## length      -0.619 -0.105              
## literality  -0.479 -0.449  0.224       
## lg(frq_HAL) -0.814 -0.055  0.257  0.149

Main effect of phrase type, no significant interaction effect.

Look at Chinese phrases and English phrases separately

# Chinese phrases
m_cc<-lmer(log(exp_resp.rt)~ IsExperimental+literality+
              length+log(freq_HAL)+
                  (1|participant)+(1|end),
                  subset(df5, df5$IsChineseIdiom=="1"& df5$IsBilingual=="1"),
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)
summary(m_cc)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental + literality + length + log(freq_HAL) +  
##     (1 | participant) + (1 | end)
##    Data: subset(df5, df5$IsChineseIdiom == "1" & df5$IsBilingual == "1")
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   -850.8   -805.8    433.4   -866.8     2052 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5256 -0.6896 -0.0888  0.6108  3.5743 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  end         (Intercept) 0.00355  0.05958 
##  participant (Intercept) 0.01287  0.11344 
##  Residual                0.03392  0.18417 
## Number of obs: 2060, groups:  end, 82; participant, 59
## 
## Fixed effects:
##                  Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)      6.744956   0.084201 85.557403  80.105  < 2e-16 ***
## IsExperimental1 -0.036412   0.017008 79.670097  -2.141  0.03535 *  
## literality      -0.006694   0.011123 85.604300  -0.602  0.54887    
## length           0.021793   0.006804 80.389343   3.203  0.00195 ** 
## log(freq_HAL)   -0.025221   0.005708 79.082626  -4.419 3.12e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 ltrlty length
## IsExprmntl1  0.076                     
## literality  -0.459 -0.391              
## length      -0.621 -0.068  0.176       
## lg(frq_HAL) -0.807 -0.006  0.085  0.237

Phrase type is significant, frequency and length are significant.

# English phrases
m_ce<-lmer(log(exp_resp.rt)~IsExperimental+literality+
              length+log(freq_HAL)+
                  (1|participant)+(1|end),
                  subset(df5, df5$IsChineseIdiom=="0"& df5$IsBilingual=="1"),
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)
summary(m_ce)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental + literality + length + log(freq_HAL) +  
##     (1 | participant) + (1 | end)
##    Data: subset(df5, df5$IsChineseIdiom == "0" & df5$IsBilingual == "1")
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   -842.8   -797.2    429.4   -858.8     2205 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8511 -0.6636 -0.0927  0.5863  3.2372 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  end         (Intercept) 0.003246 0.05698 
##  participant (Intercept) 0.012944 0.11377 
##  Residual                0.035314 0.18792 
## Number of obs: 2213, groups:  end, 88; participant, 59
## 
## Fixed effects:
##                  Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)      6.881275   0.083735 92.668491  82.179  < 2e-16 ***
## IsExperimental1 -0.012823   0.016944 83.230434  -0.757 0.451307    
## literality      -0.032559   0.009320 91.688111  -3.493 0.000736 ***
## length           0.021850   0.006597 83.931689   3.312 0.001367 ** 
## log(freq_HAL)   -0.032659   0.005780 83.182472  -5.650 2.19e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 ltrlty length
## IsExprmntl1  0.188                     
## literality  -0.510 -0.503              
## length      -0.644 -0.141  0.259       
## lg(frq_HAL) -0.846 -0.109  0.205  0.297

Phrase type no longer significant, frequency, literality and length are significant.

Bilingual Idiom Final Analysis

Data Preprocessing

Remove RTs 2SD away from group mean

Visualize RT distribution

Dummy code Binary categorical variables

Mean and sd of every condition

Omnibus test

Get native English speaker model

Look at Chinese phrases and English phrases separately

Get bilingual particpants model

Look at Chinese phrases and English phrases separately