August 04, 2014
Cross-linguistic analyses
(A) Complexity Norms
(B) Correlation between all lengths
© Correlation between all lengths, controling for frequency, open class only
(D) Correlation between all lengths and complexity, controling for frequency
(E) Translation check data
Novel real objects
(A) Norms
(B) Mappping task (adults) TO DO
© Mapping task (children) TO DO
(D) Production task (labels + descriptions) TO DO
Geons
(A) Norms
(B) Mappping task TO DO
figure out how to clear before start new experiment save to git hub clean up so that only see plots and critical statistical results check that all experients remove duplicates
processNorms = TRUE # process norms or load norms?
removeRepeatSubj = TRUE # remove repeat subjects?
savePlots = FALSE # save plots to pdf?
xling = read.csv("data/xling_csv.csv")
xling = merge(xling, englishComplexityNorms, by.x = "ENGLISH", by.y = "word")
# get rid of bad item (peso)
xling = xling[xling$ENGLISH != "peso",]
xling$Open_class = as.factor(xling$Open_class)
counts = as.data.frame(summary(xling$Open_class))
counts$class = c("closed class", "open class bare", "open class inflected")
names(counts) = c("freq", "class")
ggplot(counts, aes(class, freq, fill = class)) +
geom_bar(stat = "identity") +
ggtitle("Word types in corpus")
lens = c(which(grepl("LEN",names(xling)))) # get length column indices
col1 <- colorRampPalette(c("blue", "white" , "red"))
## Correlations between all lengths, all words
xling_len = xling[, lens]
names(xling_len) = as.character(tolower(lapply(str_split(names(xling_len),"_"),function(x) {x[1]})))
# Correlations between all lengths
cmat = cor(xling_len, use = "pairwise.complete.obs")
corrplot(cmat, tl.cex=.5, tl.srt=45, method = "color", tl.col = "black" ,col =col1(100),order = "FPC")
mean(cmat)
## [1] 0.3213
## Correlations between all lengths, open class words only
xlingO = xling[xling$Open_class != 0,lens]
names(xlingO) = as.character(tolower(lapply(str_split(names(xlingO),"_"),function(x) {x[1]})))
# correlations between all lenghts
cmat = cor(xlingO, use = "pairwise.complete.obs")
corrplot(cmat, tl.cex=.5, tl.srt=45, method = "color", tl.col = "black" ,col =col1(100), order = "FPC")
mean(cmat)
## [1] 0.2876
## all words
xling_len_p = xling[,c(lens, which(names(xling)== "log.e.freq"))]
names(xling_len_p) = as.character(tolower(lapply(str_split(names(xling_len_p),"_"),function(x) {x[1]})))
# correlations between all lengths, open class only
cmat.p = partial.r(xling_len_p,1:80,81 )
mean(cmat.p)
## [1] 0.216
## open class words only
xlingOF = xling[xling$Open_class !=0 ,c(lens, which(names(xling)== "log.e.freq"))]
names(xlingOF) = as.character(tolower(lapply(str_split(names(xlingOF),"_"),function(x) {x[1]})))
# correlations between all lengths, open class only
cmat.p = partial.r(xlingOF,1:80,81 )
# sorted by first principle component
if (savePlots) {pdf('sort.pdf',height = 10, width = 10)}
corrplot(cmat.p, tl.cex=.5, tl.srt=45, order = "FPC", method = "color", tl.col = "black" ,col =col1(100))
if (savePlots) {dev.off() }
# sorted by angular order of the eigenvectors.
corrplot(cmat.p, tl.cex=.5, tl.srt=45, order = "AOE", method = "color", tl.col = "black" ,col =col1(100))
# sorted by hierarchical clustering
corrplot(cmat.p, tl.cex=.5, tl.srt=45, order = "hclus", method = "color", tl.col = "black", col =col1(100) )
mean(cmat.p)
## [1] 0.2201
c_l$checked_only = ifelse(c_l$Checked == "yes", c_l$corr, 0)
c_l$uci = ifelse(c_l$Checked == "yes", c_l$upper.ci, 0)
c_l$lci = ifelse(c_l$Checked == "yes", c_l$lower.ci, 0)
### Plot with bootsrapped CIs on pearsons are, and parial frequencies
if (savePlots) {pdf("figure/p0.pdf", width = 10, height = 6 )}
ggplot(c_l, aes(language, 0, fill = Checked)) +
geom_bar(stat = "identity", ) +
ylab("Pearson's r") + xlab("Language") +
#ggtitle("Correlation between word length and complexity norms") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
#geom_linerange(aes(ymax=uci, ymin=lci)) +
#geom_point(data=c_l, mapping=aes(x=language, y=p.corr), size=2, shape = 17) +
#geom_hline(y=mean(c_l$corr),lty=2) +
theme(
plot.background = element_blank()
,panel.grid.major = element_blank()
,panel.grid.minor = element_blank()
#,panel.border = element_blank()
) +
theme(axis.title.x = element_text( size=25), axis.text.x = element_text( size=10),
axis.title.y = element_text( size=25), axis.text.y = element_text( size=10)) +
theme(legend.text = element_text(size = 10), legend.title = element_text(size = 10)) +
# annotate("text", x = 75, y =mean(c_l$corr) + .02 , label=paste("M=",round(mean(c_l$corr),2), sep = "")) +
scale_fill_manual(values=c("pink", "red")) +
theme(legend.position="none") +
scale_y_continuous(limits = c(-.07, .7))
if (savePlots) {dev.off()}
if (savePlots) {pdf("figure/p1.pdf", width = 10, height = 6 )}
ggplot(c_l, aes(language, checked_only, fill = Checked)) +
geom_bar(stat = "identity", ) +
ylab("Pearson's r") + xlab("Language") +
#ggtitle("Correlation between word length and complexity norms") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
geom_linerange(aes(ymax=uci, ymin=lci)) +
#geom_point(data=c_l, mapping=aes(x=language, y=p.corr), size=2, shape = 17) +
#geom_hline(y=mean(c_l$corr),lty=2) +
theme(
plot.background = element_blank()
,panel.grid.major = element_blank()
,panel.grid.minor = element_blank()
#,panel.border = element_blank()
) +
theme(axis.title.x = element_text( size=25), axis.text.x = element_text( size=10),
axis.title.y = element_text( size=25), axis.text.y = element_text( size=10)) +
theme(legend.text = element_text(size = 10), legend.title = element_text(size = 10)) +
# annotate("text", x = 75, y =mean(c_l$corr) + .02 , label=paste("M=",round(mean(c_l$corr),2), sep = "")) +
scale_fill_manual(values=c("pink", "red")) +
theme(legend.position="none") +
scale_y_continuous(limits = c(-.07, .7))
if (savePlots) {dev.off()}
### Plot with bootsrapped CIs on pearsons are, and parial frequencies
if (savePlots) {pdf("figure/p2.pdf", width = 10, height = 6 )}
ggplot(c_l, aes(language, corr, fill = Checked)) +
geom_bar(stat = "identity", ) +
ylab("Pearson's r") + xlab("Language") +
#ggtitle("Correlation between word length and complexity norms") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
geom_linerange(aes(ymax=upper.ci, ymin=lower.ci)) +
#geom_point(data=c_l, mapping=aes(x=language, y=p.corr), size=2, shape = 17) +
geom_hline(y=mean(c_l$corr),lty=2) +
theme(
plot.background = element_blank()
,panel.grid.major = element_blank()
,panel.grid.minor = element_blank()
#,panel.border = element_blank()
) +
theme(axis.title.x = element_text( size=25), axis.text.x = element_text( size=10),
axis.title.y = element_text( size=25), axis.text.y = element_text( size=10)) +
theme(legend.text = element_text(size = 10), legend.title = element_text(size = 10)) +
# annotate("text", x = 75, y =mean(c_l$corr) + .02 , label=paste("M=",round(mean(c_l$corr),2), sep = "")) +
scale_fill_manual(values=c("pink", "red")) +
theme(legend.position="none") +
scale_y_continuous(limits = c(-.07, .7))
if (savePlots) {dev.off()}
if (savePlots) {pdf("figure/p3.pdf", width = 10, height = 6 ) }
ggplot(c_l, aes(language, corr, fill = Checked)) +
geom_bar(stat = "identity", ) +
ylab("Pearson's r") + xlab("Language") +
#ggtitle("Correlation between word length and complexity norms") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
geom_linerange(aes(ymax=upper.ci, ymin=lower.ci)) +
geom_point(data=c_l, mapping=aes(x=language, y=p.corr), size=2, shape = 17) +
geom_hline(y=mean(c_l$corr),lty=2) +
theme(
plot.background = element_blank()
,panel.grid.major = element_blank()
,panel.grid.minor = element_blank()
#,panel.border = element_blank()
) +
theme(axis.title.x = element_text( size=25), axis.text.x = element_text( size=10),
axis.title.y = element_text( size=25), axis.text.y = element_text( size=10)) +
theme(legend.text = element_text(size = 10), legend.title = element_text(size = 10)) +
# annotate("text", x = 75, y =mean(c_l$corr) + .02 , label=paste("M=",round(mean(c_l$corr),2), sep = "")) +
scale_fill_manual(values=c("pink", "red")) +
theme(legend.position="none") +
scale_y_continuous(limits = c(-.07, .7))
if (savePlots) {dev.off()}
#verify correlations by looking at English
partial.r(xlingOC,c(1,which(names(xlingOC) == "complexity")), which(names(xlingOC) == "log.e.freq"))
## partial correlations
## english complexity
## english 1.00 0.55
## complexity 0.55 1.00
cor(xlingOC$english,xlingOC$complexity, use = "pairwise")
## [1] 0.6328
# mean correlation
mean(c_l$corr)
## [1] 0.3089
checksR = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/corpus/xling/translation_accuracy.csv")[1:500,]
index <- match(checksR$ENGLISH, xling$ENGLISH)
checksR$class <- xling$Open_class[index]
row.names(checksR) = checksR$ENGLISH
checksR$ENGLISH <- NULL
checksR$class = as.numeric(as.character(checksR$class))
accuracy = colSums(checksR[1:12], dims = 1)/ dim(checksR)[1]
mean(accuracy)
## [1] 0.919
dfa = as.data.frame(accuracy)
dfa$lang = row.names(dfa)
## [1] 0.919
## [1] NA
## [1] NA
ms <- aggregate(LongBet ~ engWord + complexity + log.e.freq + complexity_cil + complexity_cih + quintile, data=d, mean)
ms$bet_cil <- aggregate(LongBet ~ engWord + complexity + log.e.freq + complexity_cil + complexity_cih, data=d, ci.low)$LongBet
ms$bet_cih <- aggregate(LongBet ~ engWord + complexity + log.e.freq + complexity_cil + complexity_cih, data=d, ci.high)$LongBet
ggplot(ms, aes(norms.lf, LongBet)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbarh(aes(xmin=complexity_cil, xmax=complexity_cih), size=0.2, colour="grey") +
geom_errorbar(aes(ymin=bet_cil, ymax=bet_cih), size=0.2, colour="grey") +
annotate("text", x=6, y=25, label=paste("r=",round(cor(ms$norms.lf, ms$LongBet, use = "complete"), 2)))+
xlab("Complexity Norms") +
ylab("Bet to Long Word") +
geom_vline(xintercept = q, col = "red") +
ggtitle("High Frequency meanings (words)")
## Error: 'x' must be numeric
# correlation between norms and bets to long word (all)
cor.test(d$LongBet,d$complexity)
##
## Pearson's product-moment correlation
##
## data: d$LongBet and d$complexity
## t = 3.348, df = 1998, p-value = 0.0008281
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.03097 0.11814
## sample estimates:
## cor
## 0.0747
bm.partial(d$LongBet,d$complexity, d$log.e.freq )
## [1] 0.069
#partial.r(d[,c(4,8,10)],c(1,2),3 )
# correlation between norms and bets to long word (aggregated across words)
cor.test(ms$LongBet,ms$complexity)
##
## Pearson's product-moment correlation
##
## data: ms$LongBet and ms$complexity
## t = 2.227, df = 94, p-value = 0.02832
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02449 0.40614
## sample estimates:
## cor
## 0.2239
bm.partial(ms$LongBet,ms$complexity, ms$log.e.freq )
## [1] 0.2283
summary(lmer(LongBet ~ complexity + log.e.freq + (1|trial) + (1|workerid), d))
## Linear mixed model fit by REML ['lmerMod']
## Formula: LongBet ~ complexity + log.e.freq + (1 | trial) + (1 | workerid)
## Data: d
##
## REML criterion at convergence: 17989
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2752 -0.7294 0.0043 0.7492 2.3246
##
## Random effects:
## Groups Name Variance Std.Dev.
## workerid (Intercept) 15.19 3.90
## trial (Intercept) 1.51 1.23
## Residual 637.67 25.25
## Number of obs: 1931, groups: workerid, 200; trial, 10
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 56.215 2.872 19.58
## complexity 1.872 0.589 3.18
## log.e.freq -3.858 0.625 -6.17
##
## Correlation of Fixed Effects:
## (Intr) cmplxt
## complexity -0.688
## log.e.freq -0.709 0.047
#aggregate by quintile
ms <- aggregate(LongBet ~ quintile , data=d, mean)
ms$bet_cil <- aggregate(LongBet ~ quintile, data=d, ci.low)$LongBet
ms$bet_cih <- aggregate(LongBet ~ quintile, data=d, ci.high)$LongBet
ggplot(ms, aes(quintile, LongBet)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbar(aes(ymin=bet_cil, ymax=bet_cih), size=0.2, colour="black") +
annotate("text", x=5, y=25, label=paste("r=",round(cor(ms$quintile, ms$LongBet, use = "complete"), 2)))+
scale_y_continuous(limits = c(20, 80)) +
#scale_x_continuous(limits = c(0, 7), breaks = 1:7, labels = 1:7) +
theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) +
xlab("Complexity Norm quintile") +
ylab("Bet to Long Word") +
ggtitle("High Frequency meanings (words)")
# correlation between norms quintiles and bets to long word
cor.test(d$quintile, d$LongBet)
##
## Pearson's product-moment correlation
##
## data: d$quintile and d$LongBet
## t = 3.939, df = 1998, p-value = 8.459e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.04412 0.13111
## sample estimates:
## cor
## 0.08779
# correlation between norms quintiles and bets to long word (aggregated across words)
cor.test(ms$quintile, ms$LongBet)
##
## Pearson's product-moment correlation
##
## data: ms$quintile and ms$LongBet
## t = 2.35, df = 3, p-value = 0.1003
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2667 0.9866
## sample estimates:
## cor
## 0.805
###Plot
#aggregate by quintile
ms.qr <- aggregate(LongBet ~ resid.quintile , data=d, mean)
ms.qr$bet_cil <- aggregate(LongBet ~ resid.quintile, data=d, ci.low)$LongBet
ms.qr$bet_cih <- aggregate(LongBet ~ resid.quintile, data=d, ci.high)$LongBet
ggplot(ms.qr, aes(resid.quintile, LongBet)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbar(aes(ymin=bet_cil, ymax=bet_cih), size=0.2, colour="black") +
annotate("text", x=5, y=25, label=paste("r=",round(cor(d$resid.quintile, d$LongBet, use = "complete"), 2)))+
scale_y_continuous(limits = c(20, 80)) +
#scale_x_continuous(limits = c(0, 7), breaks = 1:7, labels = 1:7) +
theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) +
xlab("RESIDUAL Complexity Norm quintile") +
ylab("Bet to Long Word") +
ggtitle("High Frequency meanings (words)")
## Warning: NaNs produced
cor.test(d$resid.quintile, d$LongBet) ### highly correlated
##
## Pearson's product-moment correlation
##
## data: d$resid.quintile and d$LongBet
## t = 5.218, df = 1998, p-value = 1.998e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.07248 0.15897
## sample estimates:
## cor
## 0.1159
cor.test(ms.qr$resid.quintile, ms.qr$LongBet)
## Error: not enough finite observations
cor(co_norms$rating_1, co_norms$rating_2)
## [1] 0.8664
ggplot(de, aes(y=effect_size, x=objRatio)) +
geom_pointrange(aes(ymax = cill, ymin=ciul))+
geom_hline(yintercept=0,lty=2) +
stat_smooth(method="lm") +
geom_text(aes(objRatio+.03, effect_size, label=objCondition)) +
ylab("effect size") +
xlab("object ratio") +
theme(text = element_text(size=20), plot.title = element_text(size=20)) +
ggtitle("Object ratio vs. effect size") +
annotate("text", x=.3, y=-.2, col = "red",label=paste("r=",round(cor(de$effect_size, de$objRatio, use = "complete"), 2)))
ggplot(de, aes(y=effect_size, x=c.Mratio)) +
geom_pointrange(aes(ymax = cill, ymin=ciul),position="dodge")+
geom_hline(yintercept=0,lty=2) +
stat_smooth(method="lm") +
geom_text(aes(c.Mratio+.02, effect_size, label=objCondition), position="dodge") +
ylab("effect size") +
xlab("object ratio") +
ggtitle("complexity ratio vs. effect size") +
theme(text = element_text(size=20), plot.title = element_text(size=20)) +
annotate("text", x=.5, y=-.2, col = "red",label=paste("r=",round(cor(de$effect_size, de$c.Mratio, use = "complete"), 2)))
## ymax not defined: adjusting position using y instead
ggplot(de, aes(y=effect_size, x=rt.Mratio)) +
geom_pointrange( aes(ymax = cill, ymin=ciul))+
geom_hline(yintercept=0,lty=2) +
stat_smooth(method="lm") +
geom_text(aes(rt.Mratio+.0008, effect_size, label=objCondition)) +
ylab("effect size") +
xlab("RT ratio") +
ggtitle("RT ratio vs. effect size") +
theme(text = element_text(size=20), plot.title = element_text(size=20)) +
annotate("text", x=.985, y=-.2, col = "red",label=paste("r=",round(cor(de$effect_size, de$rt.Mratio, use = "complete"), 2)))
cor.test(de$objRatio, de$effect_size)
##
## Pearson's product-moment correlation
##
## data: de$objRatio and de$effect_size
## t = -3.051, df = 13, p-value = 0.009273
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8703 -0.1999
## sample estimates:
## cor
## -0.646
cor.test(de$c.Mratio, de$effect_size)
##
## Pearson's product-moment correlation
##
## data: de$c.Mratio and de$effect_size
## t = -3.601, df = 13, p-value = 0.003228
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8949 -0.3046
## sample estimates:
## cor
## -0.7066
cor.test(de$rt.Mratio, de$effect_size)
##
## Pearson's product-moment correlation
##
## data: de$rt.Mratio and de$effect_size
## t = -3.722, df = 13, p-value = 0.002561
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8995 -0.3259
## sample estimates:
## cor
## -0.7182
t.test(md[md$condition == '"complex"',"log.length"],md[md$condition == '"simple"',"log.length"],paired = TRUE)
##
## Paired t-test
##
## data: md[md$condition == "\"complex\"", "log.length"] and md[md$condition == "\"simple\"", "log.length"]
## t = 3.735, df = 286, p-value = 0.0002269
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.03846 0.12417
## sample estimates:
## mean of the differences
## 0.08131
summary(lmer(log.length~condition + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length ~ condition + (1 + trial | workerid)
## Data: md
##
## REML criterion at convergence: -0.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.009 -0.626 0.061 0.633 2.773
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerid (Intercept) 1.53e-02 0.12358
## trial 3.40e-06 0.00184 1.00
## Residual 4.92e-02 0.22189
## Number of obs: 574, groups: workerid, 59
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.9350 0.0218 88.7
## condition"simple" -0.0797 0.0185 -4.3
##
## Correlation of Fixed Effects:
## (Intr)
## cndtn"smpl" -0.427
index <- match(md$picture, co_norms$ratingNum)
md$c.norms <- co_norms$value[index]
ms <- aggregate(log.length ~ c.norms + picture, data=md, mean)
## Error: object 'c.norms' not found
ms$cih <- aggregate(log.length ~ c.norms + picture, data=md, ci.high)$log.length
## Error: object 'c.norms' not found
ms$cil <- aggregate(log.length ~ c.norms + picture, data=md, ci.low)$log.length
## Error: object 'c.norms' not found
#plot
ggplot(ms, aes(c.norms,log.length)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbar(aes(ymax=log.length+cih,ymin=log.length-cil), size=0.2, colour="grey") +
theme_bw() +
xlab("Object Complexity Norms") +
ylab("Log Word Length (characters)") +
theme(axis.title=element_text(size=20), axis.text=element_text(size=15))
## Error: object 'c.norms' not found
index <- match(md$picture, rto_norms$Answer.train_image)
md$rt.norms <- rto_norms$log.rt[index]
ms <- aggregate(log.length ~ rt.norms + picture, data=md, mean)
ms$cih <- aggregate(log.length ~ rt.norms + picture, data=md, ci.high)$log.length
ms$cil <- aggregate(log.length ~ rt.norms + picture, data=md, ci.low)$log.length
#plot
ggplot(ms, aes(rt.norms,log.length)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbar(aes(ymax=log.length+cih,ymin=log.length-cil), size=0.2, colour="grey") +
theme_bw() +
xlab("Object Complexity Norms") +
ylab("Log Word Length (characters)") +
theme(axis.title=element_text(size=20), axis.text=element_text(size=15))
#summary(lmer(length_c~condition + (1|workerid), md))
#summary(lmer(length_c~condition + trial + (1+trial|workerid), md))
summary(lmer(log.length_c~md$condition + (1|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ md$condition + (1 | workerid)
## Data: md
##
## REML criterion at convergence: 863.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.004 -0.616 0.071 0.599 4.269
##
## Random effects:
## Groups Name Variance Std.Dev.
## workerid (Intercept) 0.476 0.690
## Residual 0.175 0.418
## Number of obs: 600, groups: workerid, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.3545 0.0922 36.4
## md$condition"simple" -0.1128 0.0342 -3.3
##
## Correlation of Fixed Effects:
## (Intr)
## md$cndtn"s" -0.185
summary(lmer(log.length_c~condition + trial + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ condition + trial + (1 + trial | workerid)
## Data: md
##
## REML criterion at convergence: 823.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.185 -0.559 0.071 0.594 3.841
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerid (Intercept) 0.43168 0.6570
## trial 0.00177 0.0421 -0.02
## Residual 0.14949 0.3866
## Number of obs: 600, groups: workerid, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.54229 0.09310 38.0
## condition"simple" -0.12772 0.03260 -3.9
## trial -0.03278 0.00773 -4.2
##
## Correlation of Fixed Effects:
## (Intr) cndt""
## cndtn"smpl" -0.189
## trial -0.252 0.031
## plot
ggplot(md, aes(x=log.length_c, fill=condition)) + geom_density(alpha = 0.2)
index <- match(md$picture, co_norms$ratingNum)
md$c.norms <- co_norms$value[index]
summary(lmer(log.length_c~c.norms + (1+trial|workerid), md))
## Error: error in evaluating the argument 'object' in selecting a method for function 'summary': Error in eval(expr, envir, enclos) : object 'c.norms' not found
## Calls: lmer ... eval -> model.frame -> model.frame.default -> eval -> eval
summary(lmer(log.length_c~c.norms + trial + (1|workerid), md))
## Error: error in evaluating the argument 'object' in selecting a method for function 'summary': Error in eval(expr, envir, enclos) : object 'c.norms' not found
## Calls: lmer ... eval -> model.frame -> model.frame.default -> eval -> eval
# complexity norms predict length
cor.test(md$log.length_c,md$c.norms)
## Error: 'x' and 'y' must have the same length
ms <- aggregate(log.length_c ~ c.norms + picture, data=md, mean)
## Error: object 'c.norms' not found
ms$cih <- aggregate(log.length_c ~ c.norms + picture, data=md, ci.high)$log.length_c
## Error: object 'c.norms' not found
ms$cil <- aggregate(log.length_c ~ c.norms + picture, data=md, ci.low)$log.length_c
## Error: object 'c.norms' not found
ms$n <- aggregate(workerid ~ c.norms + picture, data=md, n.unique)$workerid
## Error: object 'c.norms' not found
ggplot(ms, aes(c.norms,log.length_c)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbar(aes(ymax=log.length_c+cih,ymin=log.length_c-cil), size=0.2, colour="grey") +
theme_bw() +
xlab("Object Complexity Norms") +
ylab("Log Description Length (characters)") +
theme(axis.title=element_text(size=20), axis.text=element_text(size=15))
## Error: object 'c.norms' not found
index <- match(md$picture, rto_norms$Answer.train_image)
md$rt.norms <- rto_norms$log.rt[index]
summary(lmer(log.length_c~rt.norms + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ rt.norms + (1 + trial | workerid)
## Data: md
##
## REML criterion at convergence: 830.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.357 -0.566 0.076 0.560 3.756
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerid (Intercept) 0.43769 0.6616
## trial 0.00278 0.0527 -0.08
## Residual 0.15004 0.3873
## Number of obs: 600, groups: workerid, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.506 1.089 -0.46
## rt.norms 0.523 0.146 3.58
##
## Correlation of Fixed Effects:
## (Intr)
## rt.norms -0.997
summary(lmer(log.length_c~rt.norms + trial + (1|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ rt.norms + trial + (1 | workerid)
## Data: md
##
## REML criterion at convergence: 840.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.165 -0.601 0.069 0.618 3.872
##
## Random effects:
## Groups Name Variance Std.Dev.
## workerid (Intercept) 0.481 0.694
## Residual 0.166 0.408
## Number of obs: 600, groups: workerid, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.0332 1.1091 -0.03
## rt.norms 0.4714 0.1485 3.17
## trial -0.0322 0.0058 -5.56
##
## Correlation of Fixed Effects:
## (Intr) rt.nrm
## rt.norms -0.996
## trial -0.008 -0.020
#rt norms predict length
cor.test(md$log.length_c,md$rt.norms)
##
## Pearson's product-moment correlation
##
## data: md$log.length_c and md$rt.norms
## t = 0.2782, df = 598, p-value = 0.781
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.06873 0.09134
## sample estimates:
## cor
## 0.01138
ms <- aggregate(log.length_c ~ rt.norms + picture, data=md, mean)
ms$cil <- aggregate(log.length_c ~ rt.norms + picture, data=md, ci.low)$log.length_c
ms$cih <- aggregate(log.length_c ~ rt.norms + picture, data=md, ci.high)$log.length_c
ggplot(ms, aes(rt.norms,log.length_c)) +
geom_point() +
geom_smooth(method = "lm", color="blue", formula = y ~ x) +
geom_errorbar(aes(ymax=log.length_c+cih,ymin=log.length_c-cil), size=0.2, colour="grey") +
theme_bw() +
xlab("Object RT Norms") +
ylab("Log Description Length (characters)") +
theme(axis.title=element_text(size=20), axis.text=element_text(size=15))
# reliable when control for random effects