Google cross-linguistic analyses
(A) Correlation between all lengths
(B) Correlation between all lengths, controling for frequency, open class only
© Correlation between all lengths and complexity, controling for frequency
(D) Translation check data
Novel real objects
(A) Norms
(B) Mappping task (adults)
© Mapping task (children) TO DO
(D) Production task (labels + descriptions)
Geons TO DO
(A) Norms
(B) Mappping task
xling = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/corpus/xling/RC_xling_clean_WITH_checks.csv")
norms.lf = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/Experiment_26/Analysis/complexity_word_norms.csv") #LF complexity norms
norms.b = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/corpus/brysbaert_database/brysbaert_corpus.csv",header=TRUE) #Brysbaert concreteness
norms.mrc = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/corpus/MRC_database/MRC_corpus.csv") #MRC
freqs = read.table("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/corpus/MRC_database/SUBTLEXusDataBase.txt",header=TRUE) #subtlexus frequency
#lf
index = match(xling$ENGLISH, norms.lf$word)
xling$lf.complex = norms.lf$complexity[index]
#brysbaert
index = match(xling$ENGLISH, norms.b$Word)
xling$b.conc = norms.b$Conc.M[index]
#mrc
index = match(xling$ENGLISH, norms.mrc$word)
xling$mrc.fam = norms.mrc$mrc.fam[index]
xling$mrc.conc = norms.mrc$mrc.conc[index]
xling$mrc.imag = norms.mrc$mrc.imag[index]
xling$mrc.phon = norms.mrc$mrc.phon[index]
xling$mrc.syl = norms.mrc$mrc.syl[index]
#frequency
index <- match(xling$ENGLISH, freqs$Word)
xling$log.E.freq <- freqs$Lg10WF[index]
xling$Open_class = as.factor(xling$Open_class)
counts = as.data.frame(summary(xling$Open_class))
counts$class = c("closed class", "open class bare", "open class inflected")
names(counts) = c("freq", "class")
ggplot(counts, aes(class, freq, fill = class)) +
geom_bar(stat = "identity") +
ggtitle("Word types in corpus")
lens = c(which(grepl("LEN",names(xling)))) # get length column indices
col1 <- colorRampPalette(c("blue", "white" , "red"))
## Correlations between all lengths, all words
xling_len = xling[, lens]
names(xling_len) = as.character(tolower(lapply(str_split(names(xling_len),"_"),function(x) {x[1]})))
# Correlations between all lengths
cmat = cor(xling_len, use = "pairwise.complete.obs")
corrplot(cmat, tl.cex=.5, tl.srt=45, method = "color", tl.col = "black" ,col =col1(100),order = "FPC")
mean(cmat)
## [1] 0.3206
## Correlations between all lengths, open class words only
xlingO = xling[xling$Open_class != 0,lens]
names(xlingO) = as.character(tolower(lapply(str_split(names(xlingO),"_"),function(x) {x[1]})))
# correlations between all lenghts
cmat = cor(xlingO, use = "pairwise.complete.obs")
corrplot(cmat, tl.cex=.5, tl.srt=45, method = "color", tl.col = "black" ,col =col1(100), order = "FPC")
mean(cmat)
## [1] 0.2897
## open class words only
xlingOF = xling[xling$Open_class !=0 ,c(lens, which(names(xling)== "log.E.freq"))]
names(xlingOF) = as.character(tolower(lapply(str_split(names(xlingOF),"_"),function(x) {x[1]})))
# correlations between all lengths, open class only
cmat.p = partial.r(xlingOF,1:81,82 )
# sorted by first principle component
corrplot(cmat.p, tl.cex=.5, tl.srt=45, order = "FPC", method = "color", tl.col = "black" ,col =col1(100))
# sorted by angular order of the eigenvectors.
corrplot(cmat.p, tl.cex=.5, tl.srt=45, order = "AOE", method = "color", tl.col = "black" ,col =col1(100))
# sorted by hierarchical clustering
corrplot(cmat.p, tl.cex=.5, tl.srt=45, order = "hclus", method = "color", tl.col = "black", col =col1(100) )
mean(cmat.p)
## [1] 0.2201
### Plot with bootsrapped CIs on pearsons are, and parial frequencies
ggplot(c_l, aes(language, corr)) +
geom_bar(stat = "identity", fill = "red") +
ylab("Pearson's r") + xlab("Language") +
ggtitle("Correlation between word length and complexity norms") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
geom_linerange(aes(ymax=upper.ci, ymin=lower.ci)) +
geom_point(data=c_l, mapping=aes(x=language, y=p.corr), size=2, shape = 17)
#verify correlations by looking at English
partial.r(xlingOC,c(1,which(names(xlingOC) == "lf.complex")), which(names(xlingOC) == "log.e.freq"))
## partial correlations
## english lf.complex
## english 1.00 0.57
## lf.complex 0.57 1.00
cor(xlingOC$english,xlingOC$lf.complex, use = "pairwise")
## [1] 0.6387
# mean correlation
mean(c_l$corr)
## [1] 0.3089
checksR = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/corpus/xling/translation_accuracy.csv")[1:500,]
index <- match(checksR$ENGLISH, xling$ENGLISH)
checksR$class <- xling$Open_class[index]
row.names(checksR) = checksR$ENGLISH
checksR$ENGLISH <- NULL
checksR$class = as.numeric(as.character(checksR$class))
accuracy = colSums(checksR[1:12], dims = 1)/ dim(checksR)[1]
mean(accuracy)
## [1] 0.919
dfa = as.data.frame(accuracy)
dfa$lang = row.names(dfa)
#plot
ggplot(dfa, aes(lang, accuracy, fill = lang)) +
geom_bar(stat = "identity") +
xlab("Language") +
ggtitle("Google Translate Check Accuracy") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
accuracy_open = colSums(checksR[checksR$class != 0,1:12], dims = 1)/ dim(checksR[checksR$class != 0,])[1]
mean(accuracy_open)
## [1] 0.922
accuracy_open_bare = colSums(checksR[checksR$class ==1,1:12], dims = 1)/ dim(checksR[checksR$class == 1,])[1]
mean(accuracy_open_bare)
## [1] 0.923
ms <- aggregate(LongBet ~ engWord + norms.lf + l.freq + norms.lf_ci + quintile, data=d, mean)
ms$n <- aggregate(LongBet ~ engWord + norms.lf + l.freq+ norms.lf_ci, data=d, n.unique)$workerid
ms$bet_ci <- aggregate(LongBet ~ engWord + norms.lf+ l.freq+ norms.lf_ci, data=d, ci95)$LongBet
ggplot(ms, aes(norms.lf, LongBet)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbarh(aes(xmin=norms.lf-norms.lf_ci, xmax=norms.lf+norms.lf_ci), size=0.2, colour="grey") +
geom_errorbar(aes(ymin=LongBet-bet_ci, ymax=LongBet+bet_ci), size=0.2, colour="grey") +
annotate("text", x=6, y=25, label=paste("r=",round(cor(ms$norms.lf, ms$LongBet, use = "complete"), 2)))+
xlab("Complexity Norms") +
ylab("Bet to Long Word") +
geom_vline(xintercept = q, col = "red") +
ggtitle("High Frequency meanings (words)")
# correlation between norms and bets to long word (all)
cor.test(d$LongBet,d$norms.lf)
##
## Pearson's product-moment correlation
##
## data: d$LongBet and d$norms.lf
## t = 4.229, df = 1998, p-value = 2.459e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.05056 0.13744
## sample estimates:
## cor
## 0.09418
bm.partial(d$LongBet,d$norms.lf, d$l.freq )
## [1] 0.0904
#partial.r(d[,c(4,8,10)],c(1,2),3 )
# correlation between norms and bets to long word (aggregated across words)
cor.test(ms$LongBet,ms$norms.lf)
##
## Pearson's product-moment correlation
##
## data: ms$LongBet and ms$norms.lf
## t = 2.998, df = 94, p-value = 0.003476
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1009 0.4682
## sample estimates:
## cor
## 0.2954
bm.partial(ms$LongBet,ms$norms.lf, ms$l.freq )
## [1] 0.3163
summary(lmer(LongBet ~ norms.lf + l.freq + (1|trial) + (1|workerid), d))
## Linear mixed model fit by REML ['lmerMod']
## Formula: LongBet ~ norms.lf + l.freq + (1 | trial) + (1 | workerid)
## Data: d
##
## REML criterion at convergence: 17981
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2832 -0.7451 0.0176 0.7584 2.3186
##
## Random effects:
## Groups Name Variance Std.Dev.
## workerid (Intercept) 15.4 3.93
## trial (Intercept) 1.6 1.26
## Residual 634.9 25.20
## Number of obs: 1931, groups: workerid, 200; trial, 10
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 53.188 3.037 17.51
## norms.lf 2.779 0.660 4.21
## l.freq -3.860 0.624 -6.19
##
## Correlation of Fixed Effects:
## (Intr) nrms.l
## norms.lf -0.728
## l.freq -0.665 0.035
#aggregate by quintile
ms <- aggregate(LongBet ~ quintile , data=d, mean)
ms$n <- aggregate(LongBet ~ quintile, data=d, n.unique)$workerid
ms$bet_ci <- aggregate(LongBet ~ quintile, data=d, ci95)$LongBet
ggplot(ms, aes(quintile, LongBet)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbar(aes(ymin=LongBet-bet_ci, ymax=LongBet+bet_ci), size=0.2, colour="black") +
annotate("text", x=5, y=25, label=paste("r=",round(cor(ms$quintile, ms$LongBet, use = "complete"), 2)))+
scale_y_continuous(limits = c(20, 80)) +
#scale_x_continuous(limits = c(0, 7), breaks = 1:7, labels = 1:7) +
theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) +
xlab("Complexity Norm quintile") +
ylab("Bet to Long Word") +
ggtitle("High Frequency meanings (words)")
# correlation between norms quintiles and bets to long word
cor.test(d$quintile, d$LongBet)
##
## Pearson's product-moment correlation
##
## data: d$quintile and d$LongBet
## t = 3.808, df = 1998, p-value = 0.0001443
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.04121 0.12824
## sample estimates:
## cor
## 0.08489
# correlation between norms quintiles and bets to long word (aggregated across words)
cor.test(ms$quintile, ms$LongBet)
##
## Pearson's product-moment correlation
##
## data: ms$quintile and ms$LongBet
## t = 2.315, df = 3, p-value = 0.1035
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2778 0.9862
## sample estimates:
## cor
## 0.8007
###Plot
#aggregate by quintile
ms.qr <- aggregate(LongBet ~ resid.quintile , data=d, mean)
ms.qr$n <- aggregate(LongBet ~ resid.quintile, data=d, n.unique)$workerid
ms.qr$bet_ci <- aggregate(LongBet ~ resid.quintile, data=d, ci95)$LongBet
ggplot(ms.qr, aes(resid.quintile, LongBet)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbar(aes(ymin=LongBet-bet_ci, ymax=LongBet+bet_ci), size=0.2, colour="black") +
annotate("text", x=5, y=25, label=paste("r=",round(cor(d$resid.quintile, d$LongBet, use = "complete"), 2)))+
scale_y_continuous(limits = c(20, 80)) +
#scale_x_continuous(limits = c(0, 7), breaks = 1:7, labels = 1:7) +
theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) +
xlab("RESIDUAL Complexity Norm quintile") +
ylab("Bet to Long Word") +
ggtitle("High Frequency meanings (words)")
cor.test(d$resid.quintile, d$LongBet) ### highly correlated
##
## Pearson's product-moment correlation
##
## data: d$resid.quintile and d$LongBet
## t = 3.96, df = 1910, p-value = 7.768e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.0456 0.1345
## sample estimates:
## cor
## 0.09024
cor.test(ms.qr$resid.quintile, ms.qr$LongBet)
##
## Pearson's product-moment correlation
##
## data: ms.qr$resid.quintile and ms.qr$LongBet
## t = 2.512, df = 3, p-value = 0.08678
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2156 0.9879
## sample estimates:
## cor
## 0.8233
c_norms <- read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/Experiment_9_norm/complicated1AND2_norms.csv")
rt_norms <- read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/Experiment_30/Analysis/rt_norms.csv")
ggplot(de, aes(y=effect_size, x=objRatio)) +
geom_pointrange(aes(ymax = cill, ymin=ciul))+
geom_hline(yintercept=0,lty=2) +
stat_smooth(method="lm") +
geom_text(aes(objRatio+.03, effect_size, label=objCondition)) +
ylab("effect size") +
xlab("object ratio") +
theme(text = element_text(size=20), plot.title = element_text(size=20)) +
ggtitle("Object ratio vs. effect size") +
annotate("text", x=.3, y=-.2, col = "red",label=paste("r=",round(cor(de$effect_size, de$objRatio, use = "complete"), 2)))
ggplot(de, aes(y=effect_size, x=c.Mratio)) +
geom_pointrange(aes(ymax = cill, ymin=ciul),position="dodge")+
geom_hline(yintercept=0,lty=2) +
stat_smooth(method="lm") +
geom_text(aes(c.Mratio+.02, effect_size, label=objCondition), position="dodge") +
ylab("effect size") +
xlab("object ratio") +
ggtitle("complexity ratio vs. effect size") +
theme(text = element_text(size=20), plot.title = element_text(size=20)) +
annotate("text", x=.5, y=-.2, col = "red",label=paste("r=",round(cor(de$effect_size, de$c.Mratio, use = "complete"), 2)))
## ymax not defined: adjusting position using y instead
ggplot(de, aes(y=effect_size, x=rt.Mratio)) +
geom_pointrange( aes(ymax = cill, ymin=ciul))+
geom_hline(yintercept=0,lty=2) +
stat_smooth(method="lm") +
geom_text(aes(rt.Mratio+.0008, effect_size, label=objCondition)) +
ylab("effect size") +
xlab("RT ratio") +
ggtitle("RT ratio vs. effect size") +
theme(text = element_text(size=20), plot.title = element_text(size=20)) +
annotate("text", x=.985, y=-.2, col = "red",label=paste("r=",round(cor(de$effect_size, de$rt.Mratio, use = "complete"), 2)))
cor.test(de$objRatio, de$effect_size)
##
## Pearson's product-moment correlation
##
## data: de$objRatio and de$effect_size
## t = -3.051, df = 13, p-value = 0.009273
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8703 -0.1999
## sample estimates:
## cor
## -0.646
cor.test(de$c.Mratio, de$effect_size)
##
## Pearson's product-moment correlation
##
## data: de$c.Mratio and de$effect_size
## t = -3.494, df = 13, p-value = 0.00396
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8907 -0.2854
## sample estimates:
## cor
## -0.6959
cor.test(de$rt.Mratio, de$effect_size)
##
## Pearson's product-moment correlation
##
## data: de$rt.Mratio and de$effect_size
## t = -3.659, df = 13, p-value = 0.002887
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8972 -0.3149
## sample estimates:
## cor
## -0.7123
t.test(md[md$condition == '"complex"',"log.length"],md[md$condition == '"simple"',"log.length"],paired = TRUE)
##
## Paired t-test
##
## data: md[md$condition == "\"complex\"", "log.length"] and md[md$condition == "\"simple\"", "log.length"]
## t = 3.735, df = 286, p-value = 0.0002269
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.03846 0.12417
## sample estimates:
## mean of the differences
## 0.08131
summary(lmer(log.length~condition + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length ~ condition + (1 + trial | workerid)
## Data: md
##
## REML criterion at convergence: -0.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.009 -0.626 0.061 0.633 2.773
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerid (Intercept) 1.53e-02 0.12358
## trial 3.40e-06 0.00184 1.00
## Residual 4.92e-02 0.22189
## Number of obs: 574, groups: workerid, 59
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.9350 0.0218 88.7
## condition"simple" -0.0797 0.0185 -4.3
##
## Correlation of Fixed Effects:
## (Intr)
## cndtn"smpl" -0.427
index <- match(md$picture, c_norms$ratingNum)
md$c.norms <- c_norms$value[index]
ms <- aggregate(log.length ~ c.norms + picture, data=md, mean)
ms$cih <- aggregate(log.length ~ c.norms + picture, data=md, ci.high)$log.length
ms$cil <- aggregate(log.length ~ c.norms + picture, data=md, ci.low)$log.length
ms$n <- aggregate(workerid ~ c.norms + picture, data=md, n.unique)$workerid
#plot
ggplot(ms, aes(c.norms,log.length)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbar(aes(ymax=log.length+cih,ymin=log.length-cil), size=0.2, colour="grey") +
theme_bw() +
xlab("Object Complexity Norms") +
ylab("Log Word Length (characters)") +
theme(axis.title=element_text(size=20), axis.text=element_text(size=15))
index <- match(md$picture, rt_norms$Answer.train_image)
md$rt.norms <- rt_norms$log.rt[index]
ms <- aggregate(log.length ~ rt.norms + picture, data=md, mean)
ms$cih <- aggregate(log.length ~ rt.norms + picture, data=md, ci.high)$log.length
ms$cil <- aggregate(log.length ~ rt.norms + picture, data=md, ci.low)$log.length
ms$n <- aggregate(workerid ~ rt.norms + picture, data=md, n.unique)$workerid
#plot
ggplot(ms, aes(rt.norms,log.length)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbar(aes(ymax=log.length+cih,ymin=log.length-cil), size=0.2, colour="grey") +
theme_bw() +
xlab("Object Complexity Norms") +
ylab("Log Word Length (characters)") +
theme(axis.title=element_text(size=20), axis.text=element_text(size=15))
#summary(lmer(length_c~condition + (1|workerid), md))
#summary(lmer(length_c~condition + trial + (1+trial|workerid), md))
summary(lmer(log.length_c~md$condition + (1|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ md$condition + (1 | workerid)
## Data: md
##
## REML criterion at convergence: 863.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.004 -0.616 0.071 0.599 4.269
##
## Random effects:
## Groups Name Variance Std.Dev.
## workerid (Intercept) 0.476 0.690
## Residual 0.175 0.418
## Number of obs: 600, groups: workerid, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.3545 0.0922 36.4
## md$condition"simple" -0.1128 0.0342 -3.3
##
## Correlation of Fixed Effects:
## (Intr)
## md$cndtn"s" -0.185
summary(lmer(log.length_c~condition + trial + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ condition + trial + (1 + trial | workerid)
## Data: md
##
## REML criterion at convergence: 823.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.185 -0.559 0.071 0.594 3.841
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerid (Intercept) 0.43168 0.6570
## trial 0.00177 0.0421 -0.02
## Residual 0.14949 0.3866
## Number of obs: 600, groups: workerid, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.54229 0.09310 38.0
## condition"simple" -0.12772 0.03260 -3.9
## trial -0.03278 0.00773 -4.2
##
## Correlation of Fixed Effects:
## (Intr) cndt""
## cndtn"smpl" -0.189
## trial -0.252 0.031
## plot
ggplot(md, aes(x=log.length_c, fill=condition)) + geom_density(alpha = 0.2)
index <- match(md$picture, c_norms$ratingNum)
md$c.norms <- c_norms$value[index]
summary(lmer(log.length_c~c.norms + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ c.norms + (1 + trial | workerid)
## Data: md
##
## REML criterion at convergence: 826.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.272 -0.551 0.069 0.587 3.893
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerid (Intercept) 0.44295 0.6655
## trial 0.00275 0.0524 -0.11
## Residual 0.14874 0.3857
## Number of obs: 600, groups: workerid, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.2392 0.0944 34.3
## c.norms 0.2921 0.0691 4.2
##
## Correlation of Fixed Effects:
## (Intr)
## c.norms -0.346
summary(lmer(log.length_c~c.norms + trial + (1|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ c.norms + trial + (1 | workerid)
## Data: md
##
## REML criterion at convergence: 836
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.062 -0.625 0.060 0.597 4.042
##
## Random effects:
## Groups Name Variance Std.Dev.
## workerid (Intercept) 0.477 0.690
## Residual 0.165 0.406
## Number of obs: 600, groups: workerid, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.34180 0.10143 32.9
## c.norms 0.28220 0.07006 4.0
## trial -0.03248 0.00577 -5.6
##
## Correlation of Fixed Effects:
## (Intr) c.nrms
## c.norms -0.322
## trial -0.304 -0.027
# complexity norms predict length
cor.test(md$log.length_c,md$c.norms)
##
## Pearson's product-moment correlation
##
## data: md$log.length_c and md$c.norms
## t = 1.965, df = 598, p-value = 0.04993
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 3.457e-05 1.591e-01
## sample estimates:
## cor
## 0.08008
ms <- aggregate(log.length_c ~ c.norms + picture, data=md, mean)
ms$cih <- aggregate(log.length_c ~ c.norms + picture, data=md, ci.high)$log.length_c
ms$cil <- aggregate(log.length_c ~ c.norms + picture, data=md, ci.low)$log.length_c
ms$n <- aggregate(workerid ~ c.norms + picture, data=md, n.unique)$workerid
ggplot(ms, aes(c.norms,log.length_c)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbar(aes(ymax=log.length_c+cih,ymin=log.length_c-cil), size=0.2, colour="grey") +
theme_bw() +
xlab("Object Complexity Norms") +
ylab("Log Description Length (characters)") +
theme(axis.title=element_text(size=20), axis.text=element_text(size=15))
index <- match(md$picture, rt_norms$Answer.train_image)
md$rt.norms <- rt_norms$log.rt[index]
summary(lmer(log.length_c~rt.norms + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ rt.norms + (1 + trial | workerid)
## Data: md
##
## REML criterion at convergence: 825.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.306 -0.589 0.083 0.590 3.705
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerid (Intercept) 0.43683 0.6609
## trial 0.00277 0.0527 -0.08
## Residual 0.14889 0.3859
## Number of obs: 600, groups: workerid, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -2.262 1.373 -1.65
## rt.norms 0.770 0.187 4.12
##
## Correlation of Fixed Effects:
## (Intr)
## rt.norms -0.998
summary(lmer(log.length_c~rt.norms + trial + (1|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ rt.norms + trial + (1 | workerid)
## Data: md
##
## REML criterion at convergence: 836.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.119 -0.587 0.079 0.615 3.803
##
## Random effects:
## Groups Name Variance Std.Dev.
## workerid (Intercept) 0.481 0.693
## Residual 0.165 0.406
## Number of obs: 600, groups: workerid, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -1.71467 1.39726 -1.23
## rt.norms 0.70745 0.19008 3.72
## trial -0.03215 0.00578 -5.57
##
## Correlation of Fixed Effects:
## (Intr) rt.nrm
## rt.norms -0.998
## trial -0.009 -0.014
#rt norms predict length
cor.test(md$log.length_c,md$rt.norms)
##
## Pearson's product-moment correlation
##
## data: md$log.length_c and md$rt.norms
## t = 0.848, df = 598, p-value = 0.3968
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.04551 0.11438
## sample estimates:
## cor
## 0.03466
ms <- aggregate(log.length_c ~ rt.norms + picture, data=md, mean)
ms$cih <- aggregate(log.length_c ~ rt.norms + picture, data=md, ci.high)$log.length_c
ms$cil <- aggregate(log.length_c ~ rt.norms + picture, data=md, ci.low)$log.length_c
ms$n <- aggregate(workerid ~ rt.norms + picture, data=md, n.unique)$workerid
ggplot(ms, aes(rt.norms,log.length_c)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbar(aes(ymax=log.length_c+cih,ymin=log.length_c-cil), size=0.2, colour="grey") +
theme_bw() +
xlab("Object RT Norms") +
ylab("Log Description Length (characters)") +
theme(axis.title=element_text(size=20), axis.text=element_text(size=15))
# reliable when control for random effects