library(dplyr)
library(ggplot2)
library(Hmisc)
library(lme4)
library(afex)
library(knitr)
library(data.table)
library(zoo) #replace missing with col mean
library(lmerTest)

Data Preprocessing

df<-read.csv('revision_3.csv')

#convert numerical cols to factors
cols<-c('participant','prime','end','gender','idiom_lang','nat_LAN','phrase_condition','list')
df[cols]<-lapply(df[cols],factor)

colnames(df)[colnames(df)=="idiom_lang"] <- "Idiom_Language"
colnames(df)[colnames(df)=="phrase_condition"] <- "Phrase_Type"

levels(df$Phrase_Type)[levels(df$Phrase_Type)=="Experimental"] <- 'Idiomatic'
levels(df$nat_LAN)[levels(df$nat_LAN)=="2"] <- 'Mandarin-English Bilinguals'
levels(df$nat_LAN)[levels(df$nat_LAN)=="1"] <- 'Native English Monolinguals'

#replacing missing numerical values with col mean
cols1<-c('EN_rating','CN_rating','freq_HAL','age')
df[cols1] <- sapply(na.aggregate(df[cols1]),as.numeric)


head(df)

Remove RTs 2SD away from group mean

df3 <- df %>%
            group_by(nat_LAN) %>%
            filter(!(exp_resp.rt - median(exp_resp.rt)) > abs(2*sd(exp_resp.rt)))

#% of data points removed
1-nrow(df3)/nrow(df)
## [1] 0.06933198

Visualize RT distribution

ggplot(df3, aes(Idiom_Language, exp_resp.rt, color = Phrase_Type))+
  geom_boxplot()+
  ylab('Reaction time')+
  facet_wrap(~nat_LAN)

Dummy code Binary categorical variables

#preserve df3 as the trimmed dataset with original variable levels
df4<-df3

levels(df4$nat_LAN)<-c(0,1)
colnames(df4)[colnames(df4)=="nat_LAN"] <- "IsBilingual"

levels(df4$Idiom_Language)<-c(1,0)
colnames(df4)[colnames(df4)=="Idiom_Language"] <- "IsChineseIdiom"

levels(df4$Phrase_Type)<-c(0,1)
colnames(df4)[colnames(df4)=="Phrase_Type"] <- "IsExperimental"


df5<-df4[!df4$end=="gold",]
df5<-df5[!df5$end=="water",]

df5<- df5[ -c(17,18) ] #remove duplicate columns

Mean and sd of every condition

grouped_mean<-df5 %>%
  group_by(IsExperimental, IsBilingual,IsChineseIdiom) %>%
  summarise(mean(exp_resp.rt), sd(exp_resp.rt), mean(log(exp_resp.rt)), sd(log(exp_resp.rt)))
## `summarise()` regrouping output by 'IsExperimental', 'IsBilingual' (override with `.groups` argument)
kable(grouped_mean)
IsExperimental IsBilingual IsChineseIdiom mean(exp_resp.rt) sd(exp_resp.rt) mean(log(exp_resp.rt)) sd(log(exp_resp.rt))
0 0 1 636.6360 126.6395 6.436875 0.1962024
0 0 0 645.6663 127.1494 6.450971 0.1973788
0 1 1 728.0846 169.0169 6.564474 0.2267021
0 1 0 729.2199 172.1004 6.564876 0.2322806
1 0 1 610.0505 122.4104 6.394016 0.1970651
1 0 0 576.4549 138.2126 6.328585 0.2384026
1 1 1 702.3808 160.9192 6.529157 0.2240436
1 1 0 699.2999 162.8246 6.523844 0.2284220

Omnibus test

m<-lmer(log(exp_resp.rt)~IsExperimental*IsChineseIdiom*IsBilingual+ 
              length+log(freq_HAL)+
          semantic_relatedness+literality+
                  (1+IsExperimental+IsChineseIdiom|participant)+(1|end),
                  df5,
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)

summary(m)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental * IsChineseIdiom * IsBilingual +  
##     length + log(freq_HAL) + semantic_relatedness + literality +  
##     (1 + IsExperimental + IsChineseIdiom | participant) + (1 |      end)
##    Data: df5
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    -4428    -4286     2234    -4468     8940 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.9159 -0.6725 -0.0724  0.6181  3.7056 
## 
## Random effects:
##  Groups      Name            Variance  Std.Dev. Corr       
##  end         (Intercept)     0.0025510 0.05051             
##  participant (Intercept)     0.0116526 0.10795             
##              IsExperimental1 0.0008852 0.02975   0.28      
##              IsChineseIdiom0 0.0003493 0.01869  -0.25  0.10
##  Residual                    0.0327306 0.18092             
## Number of obs: 8960, groups:  end, 170; participant, 122
## 
## Fixed effects:
##                                                Estimate Std. Error
## (Intercept)                                   6.674e+00  5.043e-02
## IsExperimental1                              -2.958e-02  1.443e-02
## IsChineseIdiom0                               1.250e-02  1.374e-02
## IsBilingual1                                  1.302e-01  2.111e-02
## length                                        1.439e-02  3.840e-03
## log(freq_HAL)                                -2.136e-02  3.294e-03
## semantic_relatedness                         -3.631e-02  2.634e-02
## literality                                   -2.787e-02  5.822e-03
## IsExperimental1:IsChineseIdiom0              -7.285e-02  1.898e-02
## IsExperimental1:IsBilingual1                  8.673e-03  1.231e-02
## IsChineseIdiom0:IsBilingual1                 -1.668e-02  1.163e-02
## IsExperimental1:IsChineseIdiom0:IsBilingual1  7.793e-02  1.543e-02
##                                                      df t value Pr(>|t|)
## (Intercept)                                   1.929e+02 132.347  < 2e-16
## IsExperimental1                               2.240e+02  -2.049 0.041617
## IsChineseIdiom0                               2.235e+02   0.910 0.363607
## IsBilingual1                                  1.252e+02   6.168 8.81e-09
## length                                        1.614e+02   3.746 0.000250
## log(freq_HAL)                                 1.585e+02  -6.487 1.06e-09
## semantic_relatedness                          1.829e+02  -1.379 0.169698
## literality                                    1.770e+02  -4.787 3.56e-06
## IsExperimental1:IsChineseIdiom0               2.160e+02  -3.839 0.000162
## IsExperimental1:IsBilingual1                  3.283e+02   0.704 0.481645
## IsChineseIdiom0:IsBilingual1                  4.244e+02  -1.434 0.152270
## IsExperimental1:IsChineseIdiom0:IsBilingual1  8.532e+03   5.051 4.48e-07
##                                                 
## (Intercept)                                  ***
## IsExperimental1                              *  
## IsChineseIdiom0                                 
## IsBilingual1                                 ***
## length                                       ***
## log(freq_HAL)                                ***
## semantic_relatedness                            
## literality                                   ***
## IsExperimental1:IsChineseIdiom0              ***
## IsExperimental1:IsBilingual1                    
## IsChineseIdiom0:IsBilingual1                    
## IsExperimental1:IsChineseIdiom0:IsBilingual1 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 IsChI0 IsBln1 length l(_HAL smntc_ ltrlty
## IsExprmntl1 -0.016                                                 
## IsChinsIdm0 -0.244  0.455                                          
## IsBilingul1 -0.202  0.051  0.130                                   
## length      -0.624 -0.043  0.102  0.003                            
## lg(frq_HAL) -0.793 -0.012  0.070 -0.002  0.271                     
## smntc_rltdn -0.031  0.016  0.090  0.002  0.032 -0.089              
## literality  -0.449 -0.231  0.021  0.000  0.214  0.153 -0.169       
## IsExp1:ICI0  0.157 -0.646 -0.698 -0.074 -0.039 -0.025 -0.060 -0.084
## IsExpr1:IB1  0.029 -0.401 -0.179 -0.130 -0.006 -0.002  0.000 -0.001
## IsChnI0:IB1  0.062 -0.181 -0.407 -0.323 -0.005  0.004 -0.001  0.004
## IE1:ICI0:IB -0.038  0.257  0.280  0.193  0.006 -0.003  0.000 -0.002
##             IsE1:ICI0 IE1:IB ICI0:I
## IsExprmntl1                        
## IsChinsIdm0                        
## IsBilingul1                        
## length                             
## lg(frq_HAL)                        
## smntc_rltdn                        
## literality                         
## IsExp1:ICI0                        
## IsExpr1:IB1  0.245                 
## IsChnI0:IB1  0.269     0.450       
## IE1:ICI0:IB -0.385    -0.644 -0.690
# reduced model
step_result<-step(m)
final_m <- get_model(step_result)


summary(final_m)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental + IsChineseIdiom + IsBilingual +  
##     length + log(freq_HAL) + literality + (1 | end) + (IsExperimental |  
##     participant) + IsExperimental:IsChineseIdiom + IsExperimental:IsBilingual +  
##     IsChineseIdiom:IsBilingual + IsExperimental:IsChineseIdiom:IsBilingual
##    Data: df5
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -4431.6  -4318.0   2231.8  -4463.6     8944 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.9028 -0.6724 -0.0691  0.6172  3.6916 
## 
## Random effects:
##  Groups      Name            Variance Std.Dev. Corr
##  end         (Intercept)     0.002529 0.05029      
##  participant (Intercept)     0.011218 0.10592      
##              IsExperimental1 0.000881 0.02968  0.29
##  Residual                    0.032832 0.18120      
## Number of obs: 8960, groups:  end, 170; participant, 122
## 
## Fixed effects:
##                                                Estimate Std. Error
## (Intercept)                                   6.671e+00  5.019e-02
## IsExperimental1                              -2.938e-02  1.439e-02
## IsChineseIdiom0                               1.446e-02  1.344e-02
## IsBilingual1                                  1.301e-01  2.077e-02
## length                                        1.457e-02  3.826e-03
## log(freq_HAL)                                -2.175e-02  3.270e-03
## literality                                   -2.919e-02  5.722e-03
## IsExperimental1:IsChineseIdiom0              -7.442e-02  1.889e-02
## IsExperimental1:IsBilingual1                  8.926e-03  1.231e-02
## IsChineseIdiom0:IsBilingual1                 -1.661e-02  1.113e-02
## IsExperimental1:IsChineseIdiom0:IsBilingual1  7.767e-02  1.545e-02
##                                                      df t value Pr(>|t|)
## (Intercept)                                   1.954e+02 132.917  < 2e-16
## IsExperimental1                               2.282e+02  -2.041 0.042364
## IsChineseIdiom0                               2.285e+02   1.076 0.282937
## IsBilingual1                                  1.370e+02   6.265 4.51e-09
## length                                        1.641e+02   3.809 0.000197
## log(freq_HAL)                                 1.616e+02  -6.651 4.27e-10
## literality                                    1.876e+02  -5.102 8.21e-07
## IsExperimental1:IsChineseIdiom0               2.205e+02  -3.939 0.000110
## IsExperimental1:IsBilingual1                  3.300e+02   0.725 0.469052
## IsChineseIdiom0:IsBilingual1                  8.609e+03  -1.492 0.135818
## IsExperimental1:IsChineseIdiom0:IsBilingual1  8.605e+03   5.028 5.06e-07
##                                                 
## (Intercept)                                  ***
## IsExperimental1                              *  
## IsChineseIdiom0                                 
## IsBilingual1                                 ***
## length                                       ***
## log(freq_HAL)                                ***
## literality                                   ***
## IsExperimental1:IsChineseIdiom0              ***
## IsExperimental1:IsBilingual1                    
## IsChineseIdiom0:IsBilingual1                    
## IsExperimental1:IsChineseIdiom0:IsBilingual1 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 IsChI0 IsBln1 length l(_HAL ltrlty IsE1:ICI0
## IsExprmntl1 -0.016                                                    
## IsChinsIdm0 -0.235  0.458                                             
## IsBilingul1 -0.199  0.051  0.106                                      
## length      -0.625 -0.044  0.100  0.003                               
## lg(frq_HAL) -0.800 -0.010  0.080 -0.002  0.275                        
## literality  -0.462 -0.232  0.038  0.000  0.223  0.141                 
## IsExp1:ICI0  0.156 -0.647 -0.708 -0.075 -0.037 -0.030 -0.096          
## IsExpr1:IB1  0.029 -0.402 -0.178 -0.131 -0.006 -0.002 -0.001  0.246   
## IsChnI0:IB1  0.051 -0.184 -0.398 -0.271 -0.005  0.004  0.004  0.283   
## IE1:ICI0:IB -0.038  0.258  0.287  0.196  0.006 -0.003 -0.002 -0.387   
##             IE1:IB ICI0:I
## IsExprmntl1              
## IsChinsIdm0              
## IsBilingul1              
## length                   
## lg(frq_HAL)              
## literality               
## IsExp1:ICI0              
## IsExpr1:IB1              
## IsChnI0:IB1  0.457       
## IE1:ICI0:IB -0.644 -0.721

significant 3-way interaction

Get native English speaker model

# full model
m_e<-lmer(log(exp_resp.rt)~IsExperimental*IsChineseIdiom+
              length+EN_rating+log(freq_HAL)+semantic_relatedness+ literality+
                  (1+IsExperimental+IsChineseIdiom|participant)+(1|end),
                  df5[df5$IsBilingual=='0',],
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)

summary(m_e)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: log(exp_resp.rt) ~ IsExperimental * IsChineseIdiom + length +  
##     EN_rating + log(freq_HAL) + semantic_relatedness + literality +  
##     (1 + IsExperimental + IsChineseIdiom | participant) + (1 |      end)
##    Data: df5[df5$IsBilingual == "0", ]
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -2677.3  -2567.6   1355.7  -2711.3     4670 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.2931 -0.6424 -0.0489  0.6073  3.6620 
## 
## Random effects:
##  Groups      Name            Variance  Std.Dev. Corr       
##  end         (Intercept)     0.0024933 0.04993             
##  participant (Intercept)     0.0104809 0.10238             
##              IsExperimental1 0.0013086 0.03618   0.49      
##              IsChineseIdiom0 0.0006344 0.02519  -0.32  0.07
##  Residual                    0.0297639 0.17252             
## Number of obs: 4687, groups:  end, 170; participant, 63
## 
## Fixed effects:
##                                   Estimate Std. Error         df t value
## (Intercept)                       6.664030   0.053826 178.738651 123.808
## IsExperimental1                  -0.024858   0.014466 163.639745  -1.718
## IsChineseIdiom0                   0.108096   0.052237 157.576153   2.069
## length                            0.010737   0.004214 158.964277   2.548
## EN_rating                        -0.022358   0.011737 155.916628  -1.905
## log(freq_HAL)                    -0.014498   0.003535 155.725206  -4.101
## semantic_relatedness             -0.010441   0.028423 164.847234  -0.367
## literality                       -0.034564   0.006277 167.482958  -5.506
## IsExperimental1:IsChineseIdiom0  -0.071035   0.018602 153.249022  -3.819
##                                 Pr(>|t|)    
## (Intercept)                      < 2e-16 ***
## IsExperimental1                 0.087620 .  
## IsChineseIdiom0                 0.040147 *  
## length                          0.011778 *  
## EN_rating                       0.058628 .  
## log(freq_HAL)                   6.61e-05 ***
## semantic_relatedness            0.713838    
## literality                      1.36e-07 ***
## IsExperimental1:IsChineseIdiom0 0.000194 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 IsChI0 length EN_rtn l(_HAL smntc_ ltrlty
## IsExprmntl1  0.023                                                 
## IsChinsIdm0  0.048  0.108                                          
## length      -0.589 -0.048  0.249                                   
## EN_rating   -0.115  0.006 -0.965 -0.230                            
## lg(frq_HAL) -0.771 -0.014  0.147  0.298 -0.133                     
## smntc_rltdn -0.038  0.009  0.028  0.035 -0.003 -0.088              
## literality  -0.455 -0.248  0.044  0.227 -0.040  0.159 -0.148       
## IsExp1:ICI0  0.153 -0.626 -0.181 -0.041  0.001 -0.024 -0.067 -0.091
# reduced model
step_result<-step(m_e)
final_model <- get_model(step_result)

final_model
## Linear mixed model fit by maximum likelihood  ['lmerModLmerTest']
## Formula: log(exp_resp.rt) ~ IsExperimental + IsChineseIdiom + length +  
##     log(freq_HAL) + literality + (1 | end) + (IsExperimental |  
##     participant) + IsExperimental:IsChineseIdiom
##    Data: df5[df5$IsBilingual == "0", ]
##       AIC       BIC    logLik  deviance  df.resid 
## -2679.102 -2601.671  1351.551 -2703.102      4675 
## Random effects:
##  Groups      Name            Std.Dev. Corr
##  end         (Intercept)     0.05059      
##  participant (Intercept)     0.09894      
##              IsExperimental1 0.03598  0.51
##  Residual                    0.17301      
## Number of obs: 4687, groups:  end, 170; participant, 63
## Fixed Effects:
##                     (Intercept)                  IsExperimental1  
##                        6.651255                        -0.024785  
##                 IsChineseIdiom0                           length  
##                        0.012931                         0.008993  
##                   log(freq_HAL)                       literality  
##                       -0.015520                        -0.035409  
## IsExperimental1:IsChineseIdiom0  
##                       -0.071482
summary(final_model)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: log(exp_resp.rt) ~ IsExperimental + IsChineseIdiom + length +  
##     log(freq_HAL) + literality + (1 | end) + (IsExperimental |  
##     participant) + IsExperimental:IsChineseIdiom
##    Data: df5[df5$IsBilingual == "0", ]
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -2679.1  -2601.7   1351.6  -2703.1     4675 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.2772 -0.6462 -0.0418  0.6061  3.6389 
## 
## Random effects:
##  Groups      Name            Variance Std.Dev. Corr
##  end         (Intercept)     0.002559 0.05059      
##  participant (Intercept)     0.009790 0.09894      
##              IsExperimental1 0.001295 0.03598  0.51
##  Residual                    0.029933 0.17301      
## Number of obs: 4687, groups:  end, 170; participant, 63
## 
## Fixed effects:
##                                   Estimate Std. Error         df t value
## (Intercept)                       6.651255   0.053813 181.119708 123.599
## IsExperimental1                  -0.024785   0.014584 165.909043  -1.699
## IsChineseIdiom0                   0.012931   0.013329 160.913824   0.970
## length                            0.008993   0.004138 161.412590   2.173
## log(freq_HAL)                    -0.015520   0.003524 157.912284  -4.404
## literality                       -0.035409   0.006262 173.562725  -5.655
## IsExperimental1:IsChineseIdiom0  -0.071482   0.018743 155.646622  -3.814
##                                 Pr(>|t|)    
## (Intercept)                      < 2e-16 ***
## IsExperimental1                 0.091104 .  
## IsChineseIdiom0                 0.333425    
## length                          0.031231 *  
## log(freq_HAL)                   1.95e-05 ***
## literality                      6.31e-08 ***
## IsExperimental1:IsChineseIdiom0 0.000197 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 IsChI0 length l(_HAL ltrlty
## IsExprmntl1  0.024                                   
## IsChinsIdm0 -0.230  0.446                            
## length      -0.638 -0.048  0.107                     
## lg(frq_HAL) -0.809 -0.012  0.083  0.281              
## literality  -0.475 -0.250  0.037  0.232  0.144       
## IsExp1:ICI0  0.152 -0.628 -0.707 -0.039 -0.030 -0.103

No significant main effect of phrase type or phrase language, there is a significant interaction effect

Look at Chinese phrases and English phrases separately

# Chinese phrases
m_ec<-lmer(log(exp_resp.rt)~ IsExperimental+ literality+ 
              length+EN_rating+log(freq_HAL)+ 
                  (IsExperimental|participant)+(1|end),
                  subset(df5, df5$IsChineseIdiom=="1"& df5$IsBilingual=="0"),
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)
summary(m_ec)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental + literality + length + EN_rating +  
##     log(freq_HAL) + (IsExperimental | participant) + (1 | end)
##    Data: subset(df5, df5$IsChineseIdiom == "1" & df5$IsBilingual == "0")
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -1574.7  -1511.6    798.4  -1596.7     2293 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.5181 -0.6553 -0.0768  0.5951  3.1674 
## 
## Random effects:
##  Groups      Name            Variance  Std.Dev. Corr
##  end         (Intercept)     0.0020849 0.04566      
##  participant (Intercept)     0.0110869 0.10529      
##              IsExperimental1 0.0003273 0.01809  0.40
##  Residual                    0.0259327 0.16104      
## Number of obs: 2304, groups:  end, 82; participant, 63
## 
## Fixed effects:
##                  Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)      6.643222   0.070273 79.495093  94.534  < 2e-16 ***
## IsExperimental1 -0.032935   0.013481 69.821193  -2.443 0.017097 *  
## literality      -0.021926   0.008942 77.880441  -2.452 0.016447 *  
## length           0.005402   0.005438 72.937557   0.993 0.323804    
## EN_rating        0.002169   0.016501 72.786692   0.131 0.895769    
## log(freq_HAL)   -0.016666   0.004439 72.560580  -3.755 0.000347 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 ltrlty length EN_rtn
## IsExprmntl1  0.103                            
## literality  -0.492 -0.391                     
## length      -0.506 -0.037  0.129              
## EN_rating   -0.334 -0.084  0.210 -0.223       
## lg(frq_HAL) -0.768 -0.002  0.087  0.238  0.039

Phrase type is now significant, frequency, and literality is significant.

# English phrases
m_ee<-lmer(log(exp_resp.rt)~IsExperimental+literality+ 
              length+EN_rating+log(freq_HAL)+
                  (IsExperimental|participant)+(1|end),
                  subset(df5, df5$IsChineseIdiom=="0"& df5$IsBilingual=="0"),
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)
summary(m_ee)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental + literality + length + EN_rating +  
##     log(freq_HAL) + (IsExperimental | participant) + (1 | end)
##    Data: subset(df5, df5$IsChineseIdiom == "0" & df5$IsBilingual == "0")
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -1037.5   -974.0    529.8  -1059.5     2372 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.7769 -0.6535 -0.0501  0.6372  3.4075 
## 
## Random effects:
##  Groups      Name            Variance Std.Dev. Corr
##  end         (Intercept)     0.002325 0.04822      
##  participant (Intercept)     0.008710 0.09333      
##              IsExperimental1 0.003797 0.06162  0.32
##  Residual                    0.033207 0.18223      
## Number of obs: 2383, groups:  end, 88; participant, 63
## 
## Fixed effects:
##                  Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)      6.802166   0.098005 82.703538  69.406  < 2e-16 ***
## IsExperimental1 -0.091286   0.016785 97.600697  -5.439 3.97e-07 ***
## literality      -0.038177   0.008566 88.389842  -4.457 2.43e-05 ***
## length           0.013936   0.006070 82.012900   2.296   0.0242 *  
## EN_rating       -0.036472   0.016703 79.431723  -2.184   0.0319 *  
## log(freq_HAL)   -0.010435   0.005341 79.628824  -1.954   0.0542 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 ltrlty length EN_rtn
## IsExprmntl1  0.068                            
## literality  -0.210 -0.449                     
## length      -0.288 -0.153  0.320              
## EN_rating   -0.657  0.105 -0.257 -0.280       
## lg(frq_HAL) -0.396 -0.114  0.275  0.366 -0.318

Phrase type is still significant. Familarity rating, literality, word length are significant. Frequency marginally significant, showing English participants’ RT was not affected by word frequency.

Get bilingual particpants model

# full model
m_c<-lmer(log(exp_resp.rt)~IsExperimental*IsChineseIdiom+length+ CN_rating+ literality+
            semantic_relatedness+log(freq_HAL)+CAN_year+
                  (IsExperimental+IsChineseIdiom|participant)+(1|end),
                  df5[df5$IsBilingual=='1',],
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)

summary(m_c)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: log(exp_resp.rt) ~ IsExperimental * IsChineseIdiom + length +  
##     CN_rating + literality + semantic_relatedness + log(freq_HAL) +  
##     CAN_year + (IsExperimental + IsChineseIdiom | participant) +  
##     (1 | end)
##    Data: df5[df5$IsBilingual == "1", ]
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -1800.7  -1686.2    918.4  -1836.7     4255 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.7175 -0.6747 -0.1050  0.6127  3.5911 
## 
## Random effects:
##  Groups      Name            Variance  Std.Dev. Corr       
##  end         (Intercept)     0.0034385 0.05864             
##  participant (Intercept)     0.0117841 0.10855             
##              IsExperimental1 0.0003869 0.01967   0.05      
##              IsChineseIdiom0 0.0002289 0.01513   0.01 -0.08
##  Residual                    0.0345525 0.18588             
## Number of obs: 4273, groups:  end, 170; participant, 59
## 
## Fixed effects:
##                                   Estimate Std. Error         df t value
## (Intercept)                      6.883e+00  7.601e-02  2.339e+02  90.553
## IsExperimental1                 -2.832e-02  1.627e-02  1.574e+02  -1.741
## IsChineseIdiom0                 -4.853e-03  1.970e-02  1.732e+02  -0.246
## length                           2.192e-02  4.759e-03  1.649e+02   4.605
## CN_rating                       -6.279e-04  6.338e-03  1.990e+02  -0.099
## literality                      -1.967e-02  7.238e-03  1.727e+02  -2.717
## semantic_relatedness            -4.718e-02  3.279e-02  1.740e+02  -1.439
## log(freq_HAL)                   -2.788e-02  4.086e-03  1.622e+02  -6.822
## CAN_year                        -1.255e-02  6.717e-03  5.882e+01  -1.868
## IsExperimental1:IsChineseIdiom0  6.594e-03  2.167e-02  1.590e+02   0.304
##                                 Pr(>|t|)    
## (Intercept)                      < 2e-16 ***
## IsExperimental1                  0.08371 .  
## IsChineseIdiom0                  0.80566    
## length                          8.21e-06 ***
## CN_rating                        0.92118    
## literality                       0.00725 ** 
## semantic_relatedness             0.15208    
## log(freq_HAL)                   1.69e-10 ***
## CAN_year                         0.06671 .  
## IsExperimental1:IsChineseIdiom0  0.76126    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 IsChI0 length CN_rtn ltrlty smntc_ l(_HAL CAN_yr
## IsExprmntl1 -0.010                                                        
## IsChinsIdm0 -0.376  0.363                                                 
## length      -0.496 -0.056  0.062                                          
## CN_rating   -0.362  0.005  0.617 -0.031                                   
## literality  -0.372 -0.259  0.023  0.211  0.001                            
## smntc_rltdn -0.054  0.011  0.111  0.036  0.056 -0.152                     
## lg(frq_HAL) -0.664 -0.018  0.082  0.261  0.030  0.158 -0.084              
## CAN_year    -0.459  0.005  0.002  0.002  0.000 -0.005  0.004  0.002       
## IsExp1:ICI0  0.135 -0.655 -0.563 -0.032 -0.020 -0.095 -0.067 -0.029 -0.003
# reduced model
step_resultc<-step(m_c)
final_model_c <- get_model(step_resultc)

#final_model_c

summary(final_model_c)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental + length + literality + log(freq_HAL) +  
##     (1 | end) + (1 | participant)
##    Data: df5[df5$IsBilingual == "1", ]
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  -1813.8  -1762.9    914.9  -1829.8     4265 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8145 -0.6780 -0.1049  0.6126  3.5662 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  end         (Intercept) 0.003436 0.05862 
##  participant (Intercept) 0.012783 0.11306 
##  Residual                0.034726 0.18635 
## Number of obs: 4273, groups:  end, 170; participant, 59
## 
## Fixed effects:
##                   Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)       6.810274   0.059998 189.958830 113.509  < 2e-16 ***
## IsExperimental1  -0.026235   0.012018 164.367855  -2.183  0.03045 *  
## length            0.022136   0.004721 166.853395   4.689 5.69e-06 ***
## literality       -0.021056   0.007107 179.100732  -2.963  0.00346 ** 
## log(freq_HAL)    -0.028345   0.004050 164.419871  -6.999 6.23e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 length ltrlty
## IsExprmntl1  0.129                     
## length      -0.619 -0.105              
## literality  -0.479 -0.449  0.224       
## lg(frq_HAL) -0.814 -0.055  0.257  0.149

Main effect of phrase type, no significant interaction effect.

Look at Chinese phrases and English phrases separately

# Chinese phrases
m_cc<-lmer(log(exp_resp.rt)~ IsExperimental+literality+
              length+log(freq_HAL)+
                  (1|participant)+(1|end),
                  subset(df5, df5$IsChineseIdiom=="1"& df5$IsBilingual=="1"),
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)
summary(m_cc)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental + literality + length + log(freq_HAL) +  
##     (1 | participant) + (1 | end)
##    Data: subset(df5, df5$IsChineseIdiom == "1" & df5$IsBilingual == "1")
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   -850.8   -805.8    433.4   -866.8     2052 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5256 -0.6896 -0.0888  0.6108  3.5743 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  end         (Intercept) 0.00355  0.05958 
##  participant (Intercept) 0.01287  0.11344 
##  Residual                0.03392  0.18417 
## Number of obs: 2060, groups:  end, 82; participant, 59
## 
## Fixed effects:
##                  Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)      6.744956   0.084201 85.557403  80.105  < 2e-16 ***
## IsExperimental1 -0.036412   0.017008 79.670097  -2.141  0.03535 *  
## literality      -0.006694   0.011123 85.604300  -0.602  0.54887    
## length           0.021793   0.006804 80.389343   3.203  0.00195 ** 
## log(freq_HAL)   -0.025221   0.005708 79.082626  -4.419 3.12e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 ltrlty length
## IsExprmntl1  0.076                     
## literality  -0.459 -0.391              
## length      -0.621 -0.068  0.176       
## lg(frq_HAL) -0.807 -0.006  0.085  0.237

Phrase type is significant, frequency and length are significant.

# English phrases
m_ce<-lmer(log(exp_resp.rt)~IsExperimental+literality+
              length+log(freq_HAL)+
                  (1|participant)+(1|end),
                  subset(df5, df5$IsChineseIdiom=="0"& df5$IsBilingual=="1"),
                  control = lmerControl(optimizer = "bobyqa"), REML=FALSE)
summary(m_ce)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## log(exp_resp.rt) ~ IsExperimental + literality + length + log(freq_HAL) +  
##     (1 | participant) + (1 | end)
##    Data: subset(df5, df5$IsChineseIdiom == "0" & df5$IsBilingual == "1")
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   -842.8   -797.2    429.4   -858.8     2205 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8511 -0.6636 -0.0927  0.5863  3.2372 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  end         (Intercept) 0.003246 0.05698 
##  participant (Intercept) 0.012944 0.11377 
##  Residual                0.035314 0.18792 
## Number of obs: 2213, groups:  end, 88; participant, 59
## 
## Fixed effects:
##                  Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)      6.881275   0.083735 92.668491  82.179  < 2e-16 ***
## IsExperimental1 -0.012823   0.016944 83.230434  -0.757 0.451307    
## literality      -0.032559   0.009320 91.688111  -3.493 0.000736 ***
## length           0.021850   0.006597 83.931689   3.312 0.001367 ** 
## log(freq_HAL)   -0.032659   0.005780 83.182472  -5.650 2.19e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) IsExp1 ltrlty length
## IsExprmntl1  0.188                     
## literality  -0.510 -0.503              
## length      -0.644 -0.141  0.259       
## lg(frq_HAL) -0.846 -0.109  0.205  0.297

Phrase type no longer significant, frequency, literality and length are significant.