library(dplyr)
library(ggplot2)
library(Hmisc)
library(lme4)
library(afex)
library(knitr)
library(data.table)
library(zoo) #replace missing with col mean
df<-read.csv('revised_alldata.csv')
#convert numerical cols to factors
cols<-c('participant','prime','end','gender','idiom_lang','nat_LAN','phrase_condition','list')
df[cols]<-lapply(df[cols],factor)
colnames(df)[colnames(df)=="idiom_lang"] <- "Idiom_Language"
colnames(df)[colnames(df)=="phrase_condition"] <- "Phrase_Type"
levels(df$Phrase_Type)[levels(df$Phrase_Type)=="Experimental"] <- 'Idiomatic'
levels(df$nat_LAN)[levels(df$nat_LAN)=="2"] <- 'Mandarin-English Bilinguals'
levels(df$nat_LAN)[levels(df$nat_LAN)=="1"] <- 'Native English Monolinguals'
#replacing missing numerical values with col mean
cols1<-c('EN_rating','CN_rating','freq_HAL','age')
df[cols1] <- sapply(na.aggregate(df[cols1]),as.numeric)
head(df)
df3 <- df %>%
group_by(nat_LAN) %>%
filter(!(exp_resp.rt - median(exp_resp.rt)) > abs(2*sd(exp_resp.rt)))
#% of data points removed
1-nrow(df3)/nrow(df)
## [1] 0.0691818
ggplot(df3, aes(Idiom_Language, exp_resp.rt, color = Phrase_Type))+
geom_boxplot()+
ylab('Reaction time')+
facet_wrap(~nat_LAN)
#preserve df3 as the trimmed dataset with original variable levels
df4<-df3
levels(df4$nat_LAN)<-c(0,1)
colnames(df4)[colnames(df4)=="nat_LAN"] <- "IsBilingual"
levels(df4$Idiom_Language)<-c(1,0)
colnames(df4)[colnames(df4)=="Idiom_Language"] <- "IsChineseIdiom"
levels(df4$Phrase_Type)<-c(0,1)
colnames(df4)[colnames(df4)=="Phrase_Type"] <- "IsExperimental"
df5<-df4[!df4$end=="gold",]
df5<-df5[!df5$end=="water",]
Native language x phrase type x idiom language
null_m<-lmer(exp_resp.rt~1+(1|participant)+(1|end), df5, REML=FALSE)
summary(null_m)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula: exp_resp.rt ~ 1 + (1 | participant) + (1 | end)
## Data: df5
##
## AIC BIC logLik deviance df.resid
## 122151.5 122180.2 -61071.8 122143.5 9704
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.6333 -0.6813 -0.1397 0.5332 4.4332
##
## Random effects:
## Groups Name Variance Std.Dev.
## end (Intercept) 2642 51.40
## participant (Intercept) 8106 90.03
## Residual 15585 124.84
## Number of obs: 9708, groups: end, 185; participant, 122
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 678.292 9.081 169.432 74.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#ICC item
2645/(2645+15556)
## [1] 0.1453217
#ICC subject
8118/(8118+15556)
## [1] 0.3429078
About 34% of the overall variances in RT can be attributed to the variances between subjects, and 14% of variations in RT can be attributed to inter-item differences. Higher ICC indicate more similarity within cluster.
null_m<-lmer(exp_resp.rt~1+(1|participant)+(1|end), df5, REML=FALSE)
m1<-lmer(exp_resp.rt~IsExperimental+(1|participant)+(1|end), df5, REML=FALSE)
anova(null_m,m1)
m1<-lmer(exp_resp.rt~IsExperimental+(1|participant)+(1|end), df5, REML=FALSE)
m2<-lmer(exp_resp.rt~IsBilingual+IsExperimental+(1|participant)+(1|end), df5, REML=FALSE)
anova(m1,m2)
m2<-lmer(exp_resp.rt~IsBilingual+IsExperimental+(1|participant)+(1|end), df5, REML=FALSE)
m3<-lmer(exp_resp.rt~IsBilingual+IsExperimental+IsChineseIdiom+(1|participant)+(1|end), df5, REML=FALSE)
anova(m2,m3)
m3<-lmer(exp_resp.rt~IsBilingual+IsExperimental+IsChineseIdiom+(1|participant)+(1|end), df5, REML=FALSE)
m_inter<-lmer(exp_resp.rt~IsBilingual*IsExperimental*IsChineseIdiom+(1|participant)+(1|end), df5, REML=FALSE)
anova(m3, m_inter)
summary(m_inter)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula: exp_resp.rt ~ IsBilingual * IsExperimental * IsChineseIdiom +
## (1 | participant) + (1 | end)
## Data: df5
##
## AIC BIC logLik deviance df.resid
## 122049.2 122128.2 -61013.6 122027.2 9697
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.5912 -0.6807 -0.1471 0.5333 4.4695
##
## Random effects:
## Groups Name Variance Std.Dev.
## end (Intercept) 2180 46.69
## participant (Intercept) 5567 74.62
## Residual 15517 124.57
## Number of obs: 9708, groups: end, 185; participant, 122
##
## Fixed effects:
## Estimate Std. Error df
## (Intercept) 649.796 12.142 248.587
## IsBilingual1 92.658 14.472 144.638
## IsExperimental1 -29.080 10.987 213.935
## IsChineseIdiom0 10.829 10.851 221.735
## IsBilingual1:IsExperimental1 -1.606 7.212 9413.997
## IsBilingual1:IsChineseIdiom0 -7.194 7.283 9419.087
## IsExperimental1:IsChineseIdiom0 -43.105 15.450 215.915
## IsBilingual1:IsExperimental1:IsChineseIdiom0 42.721 10.180 9418.393
## t value Pr(>|t|)
## (Intercept) 53.517 < 2e-16 ***
## IsBilingual1 6.403 2.00e-09 ***
## IsExperimental1 -2.647 0.00873 **
## IsChineseIdiom0 0.998 0.31936
## IsBilingual1:IsExperimental1 -0.223 0.82376
## IsBilingual1:IsChineseIdiom0 -0.988 0.32323
## IsExperimental1:IsChineseIdiom0 -2.790 0.00574 **
## IsBilingual1:IsExperimental1:IsChineseIdiom0 4.197 2.73e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) IsBln1 IsExp1 IsChI0 IsB1:IE1 IB1:IC IE1:IC
## IsBilingul1 -0.574
## IsExprmntl1 -0.441 0.078
## IsChinsIdm0 -0.447 0.079 0.494
## IsBlng1:IE1 0.141 -0.254 -0.307 -0.158
## IsBln1:ICI0 0.140 -0.251 -0.154 -0.322 0.503
## IsExp1:ICI0 0.314 -0.055 -0.711 -0.702 0.219 0.226
## IB1:IE1:ICI -0.100 0.180 0.218 0.230 -0.708 -0.715 -0.312
n_m_e<-lmer(exp_resp.rt~1+(1|participant)+(1|end), df5[df5$IsBilingual=='0',], REML=FALSE)
summary(n_m_e)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula: exp_resp.rt ~ 1 + (1 | participant) + (1 | end)
## Data: df5[df5$IsBilingual == "0", ]
##
## AIC BIC logLik deviance df.resid
## 62656.9 62683.1 -31324.5 62648.9 5080
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9164 -0.6671 -0.1227 0.5453 3.8928
##
## Random effects:
## Groups Name Variance Std.Dev.
## end (Intercept) 2407 49.06
## participant (Intercept) 4785 69.17
## Residual 11778 108.53
## Number of obs: 5084, groups: end, 185; participant, 63
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 629.655 9.565 80.162 65.83 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#by item
2413/(2413+11760)
## [1] 0.1702533
#by subejct
4757/(4757+11760)
## [1] 0.2880063
n_m_e<-lmer(exp_resp.rt~1+(1|participant)+(1|end), df5[df5$IsBilingual=='0',], REML=FALSE)
m1_e<-lmer(exp_resp.rt~IsExperimental+(1|participant)+(1|end), df5[df5$IsBilingual=='0',], REML=FALSE)
anova(n_m_e,m1_e)
m2_e<-lmer(exp_resp.rt~IsExperimental + IsChineseIdiom +(1|participant)+(1|end), df5[df5$IsBilingual=='0',], REML=FALSE)
anova(m2_e,m1_e)
summary(m2_e)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula:
## exp_resp.rt ~ IsExperimental + IsChineseIdiom + (1 | participant) +
## (1 | end)
## Data: df5[df5$IsBilingual == "0", ]
##
## AIC BIC logLik deviance df.resid
## 62611.1 62650.3 -31299.6 62599.1 5078
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9491 -0.6688 -0.1188 0.5400 3.9052
##
## Random effects:
## Groups Name Variance Std.Dev.
## end (Intercept) 1711 41.36
## participant (Intercept) 4778 69.13
## Residual 11782 108.54
## Number of obs: 5084, groups: end, 185; participant, 63
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 659.88 10.52 111.33 62.744 < 2e-16 ***
## IsExperimental1 -50.31 6.83 172.23 -7.366 6.97e-12 ***
## IsChineseIdiom0 -11.30 6.83 172.34 -1.654 0.0999 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) IsExp1
## IsExprmntl1 -0.316
## IsChinsIdm0 -0.324 -0.013
m_inter_e<-lmer(exp_resp.rt~IsExperimental * IsChineseIdiom +(1|participant)+(1|end), df5[df5$IsBilingual=='0',], REML=FALSE)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00482567
## (tol = 0.002, component 1)
anova(m_inter_e, m2_e)
summary(m_inter_e)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula:
## exp_resp.rt ~ IsExperimental * IsChineseIdiom + (1 | participant) +
## (1 | end)
## Data: df5[df5$IsBilingual == "0", ]
##
## AIC BIC logLik deviance df.resid
## 62603.6 62649.3 -31294.8 62589.6 5077
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9727 -0.6666 -0.1242 0.5525 3.9147
##
## Random effects:
## Groups Name Variance Std.Dev.
## end (Intercept) 1602 40.03
## participant (Intercept) 4777 69.12
## Residual 11781 108.54
## Number of obs: 5084, groups: end, 185; participant, 63
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 649.582 10.936 125.334 59.399
## IsExperimental1 -29.274 9.460 170.753 -3.094
## IsChineseIdiom0 9.218 9.356 176.896 0.985
## IsExperimental1:IsChineseIdiom0 -41.571 13.312 172.279 -3.123
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## IsExperimental1 0.00231 **
## IsChineseIdiom0 0.32588
## IsExperimental1:IsChineseIdiom0 0.00210 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) IsExp1 IsChI0
## IsExprmntl1 -0.422
## IsChinsIdm0 -0.427 0.493
## IsExp1:ICI0 0.300 -0.711 -0.703
## convergence code: 0
## Model failed to converge with max|grad| = 0.00482567 (tol = 0.002, component 1)
m_inter2_e<-lmer(exp_resp.rt~IsExperimental * IsChineseIdiom +
scale(freq_HAL)+
(1|participant)+(1|end),
df5[df5$IsBilingual=='0',], REML=FALSE)
anova(m_inter_e, m_inter2_e)
summary(m_inter2_e)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula:
## exp_resp.rt ~ IsExperimental * IsChineseIdiom + scale(freq_HAL) +
## (1 | participant) + (1 | end)
## Data: df5[df5$IsBilingual == "0", ]
##
## AIC BIC logLik deviance df.resid
## 62600.6 62652.9 -31292.3 62584.6 5076
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9391 -0.6661 -0.1236 0.5510 3.9189
##
## Random effects:
## Groups Name Variance Std.Dev.
## end (Intercept) 1549 39.36
## participant (Intercept) 4775 69.10
## Residual 11781 108.54
## Number of obs: 5084, groups: end, 185; participant, 63
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 649.624 10.882 123.765 59.695
## IsExperimental1 -29.105 9.335 170.949 -3.118
## IsChineseIdiom0 8.908 9.235 177.219 0.965
## scale(freq_HAL) -7.228 3.223 174.852 -2.243
## IsExperimental1:IsChineseIdiom0 -41.683 13.137 172.518 -3.173
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## IsExperimental1 0.00214 **
## IsChineseIdiom0 0.33606
## scale(freq_HAL) 0.02615 *
## IsExperimental1:IsChineseIdiom0 0.00179 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) IsExp1 IsChI0 s(_HAL
## IsExprmntl1 -0.418
## IsChinsIdm0 -0.423 0.493
## scl(fr_HAL) -0.003 -0.008 0.014
## IsExp1:ICI0 0.297 -0.711 -0.703 0.005
m_inter3_e<-lmer(exp_resp.rt~IsExperimental * IsChineseIdiom +
scale(freq_HAL)+ EN_rating+
(1|participant)+(1|end),
df5[df5$IsBilingual=='0',], REML=FALSE)
anova(m_inter2_e,m_inter3_e)
n_m_c<-lmer(exp_resp.rt~1+(1|participant)+(1|end), df5[df5$IsBilingual=='1',], REML=FALSE)
summary(n_m_c)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula: exp_resp.rt ~ 1 + (1 | participant) + (1 | end)
## Data: df5[df5$IsBilingual == "1", ]
##
## AIC BIC logLik deviance df.resid
## 59183.6 59209.3 -29587.8 59175.6 4620
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8624 -0.6791 -0.1689 0.5317 4.0958
##
## Random effects:
## Groups Name Variance Std.Dev.
## end (Intercept) 3876 62.25
## participant (Intercept) 6420 80.13
## Residual 18894 137.45
## Number of obs: 4624, groups: end, 185; participant, 59
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 730.38 11.59 80.75 63.04 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#by item
3874/(3874+18861)
## [1] 0.1703981
#by subject
6487/(6487+18861)
## [1] 0.2559176
n_m_c<-lmer(exp_resp.rt~1+(1|participant)+(1|end), df5[df5$IsBilingual=='1',], REML=FALSE)
m1_c<-lmer(exp_resp.rt~IsExperimental+(1|participant)+(1|end), df5[df5$IsBilingual=='1',], REML=FALSE)
anova(n_m_c,m1_c)
m2_c<-lmer(exp_resp.rt~IsExperimental + IsChineseIdiom +(1|participant)+(1|end), df5[df5$IsBilingual=='1',], REML=FALSE)
anova(m2_c,m1_c)
summary(m1_c)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula: exp_resp.rt ~ IsExperimental + (1 | participant) + (1 | end)
## Data: df5[df5$IsBilingual == "1", ]
##
## AIC BIC logLik deviance df.resid
## 59176.2 59208.4 -29583.1 59166.2 4619
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8847 -0.6809 -0.1675 0.5351 4.1117
##
## Random effects:
## Groups Name Variance Std.Dev.
## end (Intercept) 3612 60.10
## participant (Intercept) 6421 80.13
## Residual 18899 137.47
## Number of obs: 4624, groups: end, 185; participant, 59
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 745.131 12.480 104.203 59.708 < 2e-16 ***
## IsExperimental1 -30.446 9.783 167.652 -3.112 0.00218 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## IsExprmntl1 -0.384
m_inter_c<-lmer(exp_resp.rt~IsExperimental * IsChineseIdiom +(1|participant)+(1|end), df5[df5$IsBilingual=='1',], REML=FALSE)
anova(m_inter_c, m2_c)
m1_1_c<-lmer(exp_resp.rt~IsExperimental + scale(freq_HAL) +
(1|participant)+(1|end), df5[df5$IsBilingual=='1',], REML=FALSE)
anova(m1_c, m1_1_c)
m1_2_c<-lmer(exp_resp.rt~IsExperimental + scale(freq_HAL) + CN_rating+
(1|participant)+(1|end), df5[df5$IsBilingual=='1',], REML=FALSE)
anova(m1_2_c,m1_1_c)
summary(m1_1_c)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula:
## exp_resp.rt ~ IsExperimental + scale(freq_HAL) + (1 | participant) +
## (1 | end)
## Data: df5[df5$IsBilingual == "1", ]
##
## AIC BIC logLik deviance df.resid
## 59164.5 59203.2 -29576.3 59152.5 4618
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8922 -0.6826 -0.1690 0.5328 4.1082
##
## Random effects:
## Groups Name Variance Std.Dev.
## end (Intercept) 3294 57.40
## participant (Intercept) 6418 80.11
## Residual 18899 137.47
## Number of obs: 4624, groups: end, 185; participant, 59
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 744.244 12.344 100.670 60.293 < 2e-16 ***
## IsExperimental1 -30.077 9.424 167.842 -3.191 0.001690 **
## scale(freq_HAL) -18.114 4.817 162.094 -3.761 0.000236 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) IsExp1
## IsExprmntl1 -0.374
## scl(fr_HAL) 0.016 -0.012
References: http://www.sfs.uni-tuebingen.de/~hbaayen/publications/baayenDavidsonBates.pdf
library(texreg)
## Version: 1.36.23
## Date: 2017-03-03
## Author: Philip Leifeld (University of Glasgow)
##
## Please cite the JSS article in your publications -- see citation("texreg").
htmlreg(list(null_m, m_inter),
file='texreg_all.doc',
single.row = T,
stars = numeric(0))
## The table was written to the file 'texreg_all.doc'.
htmlreg(list(n_m_e, m2_e, m_inter_e, m_inter2_e),
file='texreg_en.doc',
single.row = T,
stars = numeric(0))
## The table was written to the file 'texreg_en.doc'.
htmlreg(list(n_m_c, m1_c, m1_1_c),
file='texreg_ch.doc',
single.row = T,
stars = numeric(0))
## The table was written to the file 'texreg_ch.doc'.