Referential Complexity Analyses

M. Lewis

14 July 2014


Analyses:

  1. Google cross-linguistic analyses
    (A) Correlation between all lengths
    (B) Correlation between all lengths, controling for frequency, open class only
    © Correlation between all lengths and complexity, controling for frequency
    (D) Translation check data

  2. High frequency words in mapping task

  3. Novel real objects
    (A) Norms
    (B) Mappping task (adults)
    © Mapping task (children) TO DO
    (D) Production task (labels + descriptions)

  4. Geons TO DO
    (A) Norms
    (B) Mappping task


(1) Google cross-linguistic analyses (Complexity norms task)

read in data

xling = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/corpus/xling/RC_xling_clean_WITH_checks.csv") 
norms.lf = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/Experiment_26/Analysis/complexity_word_norms.csv") #LF complexity norms
norms.b = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/corpus/brysbaert_database/brysbaert_corpus.csv",header=TRUE) #Brysbaert concreteness 
norms.mrc = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/corpus/MRC_database/MRC_corpus.csv") #MRC
freqs = read.table("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/corpus/MRC_database/SUBTLEXusDataBase.txt",header=TRUE) #subtlexus frequency

merge norms to xling df

#lf
index = match(xling$ENGLISH, norms.lf$word)
xling$lf.complex = norms.lf$complexity[index]

#brysbaert
index = match(xling$ENGLISH, norms.b$Word)
xling$b.conc = norms.b$Conc.M[index]

#mrc
index = match(xling$ENGLISH, norms.mrc$word)
xling$mrc.fam = norms.mrc$mrc.fam[index]
xling$mrc.conc = norms.mrc$mrc.conc[index]
xling$mrc.imag = norms.mrc$mrc.imag[index]
xling$mrc.phon = norms.mrc$mrc.phon[index]
xling$mrc.syl = norms.mrc$mrc.syl[index]

#frequency
index <- match(xling$ENGLISH, freqs$Word)
xling$log.E.freq <- freqs$Lg10WF[index]

Word class distribution

xling$Open_class = as.factor(xling$Open_class)
counts = as.data.frame(summary(xling$Open_class))
counts$class = c("closed class", "open class bare", "open class inflected")
names(counts) = c("freq", "class")

ggplot(counts, aes(class, freq, fill = class)) + 
  geom_bar(stat = "identity") +
  ggtitle("Word types in corpus")

plot of chunk unnamed-chunk-4

(A) Correlation between all lengths

lens = c(which(grepl("LEN",names(xling)))) # get length column indices
col1 <- colorRampPalette(c("blue", "white" , "red"))

## Correlations between all lengths, all words
xling_len = xling[, lens] 
names(xling_len) = as.character(tolower(lapply(str_split(names(xling_len),"_"),function(x) {x[1]})))

# Correlations between all lengths
cmat = cor(xling_len, use = "pairwise.complete.obs")
corrplot(cmat,  tl.cex=.5, tl.srt=45, method = "color", tl.col = "black" ,col =col1(100),order = "FPC")

plot of chunk unnamed-chunk-5

mean(cmat)
## [1] 0.3206
## Correlations between all lengths, open class words only
xlingO = xling[xling$Open_class != 0,lens] 
names(xlingO) = as.character(tolower(lapply(str_split(names(xlingO),"_"),function(x) {x[1]})))

# correlations between all lenghts
cmat = cor(xlingO, use = "pairwise.complete.obs")
corrplot(cmat,  tl.cex=.5, tl.srt=45, method = "color", tl.col = "black" ,col =col1(100), order = "FPC")

plot of chunk unnamed-chunk-5

mean(cmat)
## [1] 0.2897

(B) Correlation between all lengths, controling for frequency, open class only

## open class words only
xlingOF = xling[xling$Open_class !=0 ,c(lens, which(names(xling)== "log.E.freq"))] 
names(xlingOF) = as.character(tolower(lapply(str_split(names(xlingOF),"_"),function(x) {x[1]})))

# correlations between all lengths, open class only
cmat.p = partial.r(xlingOF,1:81,82 )
# sorted by first principle component
corrplot(cmat.p,  tl.cex=.5, tl.srt=45,  order = "FPC", method = "color", tl.col = "black" ,col =col1(100))

plot of chunk unnamed-chunk-6

# sorted by  angular order of the eigenvectors.
corrplot(cmat.p,  tl.cex=.5, tl.srt=45,  order = "AOE", method = "color", tl.col = "black" ,col =col1(100))

plot of chunk unnamed-chunk-6

# sorted by hierarchical clustering
corrplot(cmat.p,  tl.cex=.5, tl.srt=45,  order = "hclus", method = "color", tl.col = "black", col =col1(100) )

plot of chunk unnamed-chunk-6

mean(cmat.p)
## [1] 0.2201

© Correlation between all lengths and complexity, controling for frequency

### Plot with bootsrapped CIs on pearsons are, and parial frequencies
ggplot(c_l, aes(language, corr)) + 
  geom_bar(stat = "identity", fill = "red") + 
  ylab("Pearson's r") + xlab("Language") + 
  ggtitle("Correlation between word length and complexity norms") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  geom_linerange(aes(ymax=upper.ci, ymin=lower.ci)) +
  geom_point(data=c_l, mapping=aes(x=language, y=p.corr), size=2, shape = 17) 

plot of chunk unnamed-chunk-8

#verify correlations by looking at English
partial.r(xlingOC,c(1,which(names(xlingOC) == "lf.complex")), which(names(xlingOC) == "log.e.freq"))
## partial correlations 
##            english lf.complex
## english       1.00       0.57
## lf.complex    0.57       1.00
cor(xlingOC$english,xlingOC$lf.complex, use = "pairwise")
## [1] 0.6387
# mean correlation
mean(c_l$corr)
## [1] 0.3089

(D) Translation checking data

checksR = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/corpus/xling/translation_accuracy.csv")[1:500,]

index <- match(checksR$ENGLISH, xling$ENGLISH)
checksR$class <- xling$Open_class[index]
row.names(checksR) = checksR$ENGLISH
checksR$ENGLISH <- NULL
checksR$class = as.numeric(as.character(checksR$class))
accuracy = colSums(checksR[1:12], dims = 1)/ dim(checksR)[1]
mean(accuracy)
## [1] 0.919
dfa = as.data.frame(accuracy)
dfa$lang = row.names(dfa)

plot accuracy

#plot
ggplot(dfa, aes(lang, accuracy, fill = lang)) + 
  geom_bar(stat = "identity") + 
  xlab("Language") + 
  ggtitle("Google Translate Check Accuracy") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) 

plot of chunk unnamed-chunk-11

accuracy_open = colSums(checksR[checksR$class != 0,1:12], dims = 1)/ dim(checksR[checksR$class != 0,])[1]
mean(accuracy_open)
## [1] 0.922
accuracy_open_bare = colSums(checksR[checksR$class ==1,1:12], dims = 1)/ dim(checksR[checksR$class == 1,])[1]
mean(accuracy_open_bare)
## [1] 0.923

ISSUES

(2) High frequency words in mapping task (Task)

read in data and prep variables

merge in stuff

get quintiles

aggregate by word

ms <- aggregate(LongBet  ~ engWord + norms.lf + l.freq + norms.lf_ci + quintile, data=d, mean)
ms$n <- aggregate(LongBet  ~ engWord + norms.lf + l.freq+ norms.lf_ci, data=d, n.unique)$workerid
ms$bet_ci <- aggregate(LongBet  ~ engWord + norms.lf+ l.freq+ norms.lf_ci, data=d, ci95)$LongBet  

plot bet to long word vs. complexity norms

ggplot(ms, aes(norms.lf, LongBet)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbarh(aes(xmin=norms.lf-norms.lf_ci, xmax=norms.lf+norms.lf_ci), size=0.2, colour="grey") +
  geom_errorbar(aes(ymin=LongBet-bet_ci, ymax=LongBet+bet_ci), size=0.2, colour="grey") +
  annotate("text", x=6, y=25, label=paste("r=",round(cor(ms$norms.lf, ms$LongBet, use = "complete"), 2)))+
  xlab("Complexity Norms") +
  ylab("Bet to Long Word") +
  geom_vline(xintercept = q, col = "red") +
  ggtitle("High Frequency meanings (words)")

plot of chunk unnamed-chunk-16

correlation between norms and length

# correlation between norms and bets to long word (all)
cor.test(d$LongBet,d$norms.lf)
## 
##  Pearson's product-moment correlation
## 
## data:  d$LongBet and d$norms.lf
## t = 4.229, df = 1998, p-value = 2.459e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.05056 0.13744
## sample estimates:
##     cor 
## 0.09418
bm.partial(d$LongBet,d$norms.lf, d$l.freq )
## [1] 0.0904
#partial.r(d[,c(4,8,10)],c(1,2),3 )

# correlation between norms and bets to long word (aggregated across words)
cor.test(ms$LongBet,ms$norms.lf)
## 
##  Pearson's product-moment correlation
## 
## data:  ms$LongBet and ms$norms.lf
## t = 2.998, df = 94, p-value = 0.003476
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1009 0.4682
## sample estimates:
##    cor 
## 0.2954
bm.partial(ms$LongBet,ms$norms.lf, ms$l.freq )
## [1] 0.3163
summary(lmer(LongBet ~ norms.lf + l.freq + (1|trial) + (1|workerid), d))
## Linear mixed model fit by REML ['lmerMod']
## Formula: LongBet ~ norms.lf + l.freq + (1 | trial) + (1 | workerid)
##    Data: d
## 
## REML criterion at convergence: 17981
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2832 -0.7451  0.0176  0.7584  2.3186 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  workerid (Intercept)  15.4     3.93   
##  trial    (Intercept)   1.6     1.26   
##  Residual             634.9    25.20   
## Number of obs: 1931, groups: workerid, 200; trial, 10
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   53.188      3.037   17.51
## norms.lf       2.779      0.660    4.21
## l.freq        -3.860      0.624   -6.19
## 
## Correlation of Fixed Effects:
##          (Intr) nrms.l
## norms.lf -0.728       
## l.freq   -0.665  0.035

plot by quintiles

#aggregate by quintile
ms <- aggregate(LongBet  ~ quintile , data=d, mean)
ms$n <- aggregate(LongBet  ~ quintile, data=d, n.unique)$workerid
ms$bet_ci <- aggregate(LongBet  ~ quintile, data=d, ci95)$LongBet  

ggplot(ms, aes(quintile, LongBet)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbar(aes(ymin=LongBet-bet_ci, ymax=LongBet+bet_ci), size=0.2, colour="black") +
  annotate("text", x=5, y=25, label=paste("r=",round(cor(ms$quintile, ms$LongBet, use = "complete"), 2)))+
  scale_y_continuous(limits = c(20, 80)) +
  #scale_x_continuous(limits = c(0, 7), breaks = 1:7, labels = 1:7)  +
  theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) +
  xlab("Complexity Norm quintile") +
  ylab("Bet to Long Word") +
  ggtitle("High Frequency meanings (words)")

plot of chunk unnamed-chunk-18

correlations with quintiles

# correlation between norms quintiles and bets to long word 
cor.test(d$quintile, d$LongBet)
## 
##  Pearson's product-moment correlation
## 
## data:  d$quintile and d$LongBet
## t = 3.808, df = 1998, p-value = 0.0001443
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.04121 0.12824
## sample estimates:
##     cor 
## 0.08489
# correlation between norms quintiles and bets to long word (aggregated across words)
cor.test(ms$quintile, ms$LongBet)
## 
##  Pearson's product-moment correlation
## 
## data:  ms$quintile and ms$LongBet
## t = 2.315, df = 3, p-value = 0.1035
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2778  0.9862
## sample estimates:
##    cor 
## 0.8007

residual quintiles (controls for frequency)

get quintiles

###Plot

#aggregate by quintile
ms.qr <- aggregate(LongBet  ~ resid.quintile , data=d, mean)
ms.qr$n <- aggregate(LongBet  ~ resid.quintile, data=d, n.unique)$workerid
ms.qr$bet_ci <- aggregate(LongBet  ~ resid.quintile, data=d, ci95)$LongBet  

ggplot(ms.qr, aes(resid.quintile, LongBet)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbar(aes(ymin=LongBet-bet_ci, ymax=LongBet+bet_ci), size=0.2, colour="black") +
  annotate("text", x=5, y=25, label=paste("r=",round(cor(d$resid.quintile, d$LongBet, use = "complete"), 2)))+
  scale_y_continuous(limits = c(20, 80)) +
  #scale_x_continuous(limits = c(0, 7), breaks = 1:7, labels = 1:7)  +
  theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) +
  xlab("RESIDUAL Complexity Norm quintile") +
  ylab("Bet to Long Word") +
ggtitle("High Frequency meanings (words)")

plot of chunk unnamed-chunk-21

residual quintiles correlations

cor.test(d$resid.quintile, d$LongBet) ### highly correlated
## 
##  Pearson's product-moment correlation
## 
## data:  d$resid.quintile and d$LongBet
## t = 3.96, df = 1910, p-value = 7.768e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.0456 0.1345
## sample estimates:
##     cor 
## 0.09024
cor.test(ms.qr$resid.quintile, ms.qr$LongBet) 
## 
##  Pearson's product-moment correlation
## 
## data:  ms.qr$resid.quintile and ms.qr$LongBet
## t = 2.512, df = 3, p-value = 0.08678
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2156  0.9879
## sample estimates:
##    cor 
## 0.8233

(3) Novel real objects

(A) Norms Complexity norming task RT task

read in complexity norms and RT norms for objects

c_norms <- read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/Experiment_9_norm/complicated1AND2_norms.csv")
rt_norms <- read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/Experiment_30/Analysis/rt_norms.csv")

(B) Mapping task (adults) (Task)

read in data and format

make everything factors

merge in norms

get effect sizes

get obj conds

ratio plots

ggplot(de, aes(y=effect_size, x=objRatio)) +
  geom_pointrange(aes(ymax = cill, ymin=ciul))+
  geom_hline(yintercept=0,lty=2) +
  stat_smooth(method="lm") +
  geom_text(aes(objRatio+.03, effect_size, label=objCondition)) +
  ylab("effect size") +
  xlab("object ratio") +
  theme(text = element_text(size=20), plot.title = element_text(size=20)) +
  ggtitle("Object ratio vs. effect size") +
  annotate("text", x=.3, y=-.2, col = "red",label=paste("r=",round(cor(de$effect_size, de$objRatio, use = "complete"), 2)))

plot of chunk unnamed-chunk-29

ggplot(de, aes(y=effect_size, x=c.Mratio)) +
  geom_pointrange(aes(ymax = cill, ymin=ciul),position="dodge")+
  geom_hline(yintercept=0,lty=2) +
  stat_smooth(method="lm") +
  geom_text(aes(c.Mratio+.02, effect_size, label=objCondition), position="dodge") +
  ylab("effect size") +
  xlab("object ratio") + 
  ggtitle("complexity ratio vs. effect size") +
  theme(text = element_text(size=20), plot.title = element_text(size=20)) +
  annotate("text", x=.5, y=-.2, col = "red",label=paste("r=",round(cor(de$effect_size, de$c.Mratio, use = "complete"), 2)))
## ymax not defined: adjusting position using y instead

plot of chunk unnamed-chunk-29

ggplot(de, aes(y=effect_size, x=rt.Mratio)) +
  geom_pointrange( aes(ymax = cill, ymin=ciul))+
  geom_hline(yintercept=0,lty=2) +
  stat_smooth(method="lm") +
  geom_text(aes(rt.Mratio+.0008, effect_size, label=objCondition)) +
  ylab("effect size") +
  xlab("RT ratio") +
   ggtitle("RT ratio vs. effect size") +
  theme(text = element_text(size=20), plot.title = element_text(size=20)) +
  annotate("text", x=.985, y=-.2, col = "red",label=paste("r=",round(cor(de$effect_size, de$rt.Mratio, use = "complete"), 2)))

plot of chunk unnamed-chunk-29

correlations between effect size at complexity conditions

cor.test(de$objRatio, de$effect_size)
## 
##  Pearson's product-moment correlation
## 
## data:  de$objRatio and de$effect_size
## t = -3.051, df = 13, p-value = 0.009273
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8703 -0.1999
## sample estimates:
##    cor 
## -0.646
cor.test(de$c.Mratio, de$effect_size)
## 
##  Pearson's product-moment correlation
## 
## data:  de$c.Mratio and de$effect_size
## t = -3.494, df = 13, p-value = 0.00396
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8907 -0.2854
## sample estimates:
##     cor 
## -0.6959
cor.test(de$rt.Mratio, de$effect_size)
## 
##  Pearson's product-moment correlation
## 
## data:  de$rt.Mratio and de$effect_size
## t = -3.659, df = 13, p-value = 0.002887
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8972 -0.3149
## sample estimates:
##     cor 
## -0.7123

© Mapping task (children) (Task)

(D) Production task (labels + desecriptions)

(1) Labels (Task)

read in data and prep data frame

relationship between condition and description length

t.test(md[md$condition == '"complex"',"log.length"],md[md$condition == '"simple"',"log.length"],paired = TRUE)
## 
##  Paired t-test
## 
## data:  md[md$condition == "\"complex\"", "log.length"] and md[md$condition == "\"simple\"", "log.length"]
## t = 3.735, df = 286, p-value = 0.0002269
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.03846 0.12417
## sample estimates:
## mean of the differences 
##                 0.08131
summary(lmer(log.length~condition + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length ~ condition + (1 + trial | workerid)
##    Data: md
## 
## REML criterion at convergence: -0.4
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.009 -0.626  0.061  0.633  2.773 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  workerid (Intercept) 1.53e-02 0.12358      
##           trial       3.40e-06 0.00184  1.00
##  Residual             4.92e-02 0.22189      
## Number of obs: 574, groups: workerid, 59
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)         1.9350     0.0218    88.7
## condition"simple"  -0.0797     0.0185    -4.3
## 
## Correlation of Fixed Effects:
##             (Intr)
## cndtn"smpl" -0.427

relationship with complicated norms

index <- match(md$picture, c_norms$ratingNum)
md$c.norms <- c_norms$value[index]

ms <- aggregate(log.length ~ c.norms + picture, data=md, mean)
ms$cih <- aggregate(log.length ~ c.norms + picture, data=md, ci.high)$log.length
ms$cil <- aggregate(log.length ~ c.norms + picture, data=md, ci.low)$log.length
ms$n <- aggregate(workerid ~ c.norms + picture, data=md, n.unique)$workerid

#plot
ggplot(ms, aes(c.norms,log.length)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbar(aes(ymax=log.length+cih,ymin=log.length-cil), size=0.2, colour="grey") +
  theme_bw() +
  xlab("Object Complexity Norms") +
  ylab("Log Word Length (characters)") +
  theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) 

plot of chunk unnamed-chunk-33

relationship with RT norms

index <- match(md$picture, rt_norms$Answer.train_image)
md$rt.norms <- rt_norms$log.rt[index]

ms <- aggregate(log.length ~ rt.norms + picture, data=md, mean)
ms$cih <- aggregate(log.length ~ rt.norms + picture, data=md, ci.high)$log.length
ms$cil <- aggregate(log.length ~ rt.norms + picture, data=md, ci.low)$log.length
ms$n <- aggregate(workerid ~ rt.norms + picture, data=md, n.unique)$workerid

#plot
ggplot(ms, aes(rt.norms,log.length)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbar(aes(ymax=log.length+cih,ymin=log.length-cil), size=0.2, colour="grey") +
  theme_bw() +
  xlab("Object Complexity Norms") +
  ylab("Log Word Length (characters)") +
  theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) 

plot of chunk unnamed-chunk-34

(2) Descriptions (Task)

read in data and prep data frame

relationship between condition and description length

#summary(lmer(length_c~condition + (1|workerid), md))
#summary(lmer(length_c~condition + trial + (1+trial|workerid), md))

summary(lmer(log.length_c~md$condition + (1|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ md$condition + (1 | workerid)
##    Data: md
## 
## REML criterion at convergence: 863.1
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.004 -0.616  0.071  0.599  4.269 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  workerid (Intercept) 0.476    0.690   
##  Residual             0.175    0.418   
## Number of obs: 600, groups: workerid, 60
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)            3.3545     0.0922    36.4
## md$condition"simple"  -0.1128     0.0342    -3.3
## 
## Correlation of Fixed Effects:
##             (Intr)
## md$cndtn"s" -0.185
summary(lmer(log.length_c~condition + trial + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ condition + trial + (1 + trial | workerid)
##    Data: md
## 
## REML criterion at convergence: 823.1
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.185 -0.559  0.071  0.594  3.841 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  workerid (Intercept) 0.43168  0.6570        
##           trial       0.00177  0.0421   -0.02
##  Residual             0.14949  0.3866        
## Number of obs: 600, groups: workerid, 60
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)        3.54229    0.09310    38.0
## condition"simple" -0.12772    0.03260    -3.9
## trial             -0.03278    0.00773    -4.2
## 
## Correlation of Fixed Effects:
##             (Intr) cndt""
## cndtn"smpl" -0.189       
## trial       -0.252  0.031
## plot
ggplot(md, aes(x=log.length_c, fill=condition)) + geom_density(alpha = 0.2)

plot of chunk unnamed-chunk-36

correlations with complexity norms

index <- match(md$picture, c_norms$ratingNum)
md$c.norms <- c_norms$value[index]

summary(lmer(log.length_c~c.norms + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ c.norms + (1 + trial | workerid)
##    Data: md
## 
## REML criterion at convergence: 826.8
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.272 -0.551  0.069  0.587  3.893 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  workerid (Intercept) 0.44295  0.6655        
##           trial       0.00275  0.0524   -0.11
##  Residual             0.14874  0.3857        
## Number of obs: 600, groups: workerid, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.2392     0.0944    34.3
## c.norms       0.2921     0.0691     4.2
## 
## Correlation of Fixed Effects:
##         (Intr)
## c.norms -0.346
summary(lmer(log.length_c~c.norms + trial + (1|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ c.norms + trial + (1 | workerid)
##    Data: md
## 
## REML criterion at convergence: 836
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.062 -0.625  0.060  0.597  4.042 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  workerid (Intercept) 0.477    0.690   
##  Residual             0.165    0.406   
## Number of obs: 600, groups: workerid, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  3.34180    0.10143    32.9
## c.norms      0.28220    0.07006     4.0
## trial       -0.03248    0.00577    -5.6
## 
## Correlation of Fixed Effects:
##         (Intr) c.nrms
## c.norms -0.322       
## trial   -0.304 -0.027
# complexity norms predict length

cor.test(md$log.length_c,md$c.norms)
## 
##  Pearson's product-moment correlation
## 
## data:  md$log.length_c and md$c.norms
## t = 1.965, df = 598, p-value = 0.04993
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  3.457e-05 1.591e-01
## sample estimates:
##     cor 
## 0.08008

complexity norms plot

ms <- aggregate(log.length_c ~ c.norms + picture, data=md, mean)
ms$cih <- aggregate(log.length_c ~ c.norms + picture, data=md, ci.high)$log.length_c
ms$cil <- aggregate(log.length_c ~ c.norms + picture, data=md, ci.low)$log.length_c
ms$n <- aggregate(workerid ~ c.norms + picture, data=md, n.unique)$workerid

ggplot(ms, aes(c.norms,log.length_c)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbar(aes(ymax=log.length_c+cih,ymin=log.length_c-cil), size=0.2, colour="grey") +
  theme_bw() +
  xlab("Object Complexity Norms") +
  ylab("Log Description Length (characters)") +
  theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) 

plot of chunk unnamed-chunk-38

correlations with RT norms

index <- match(md$picture, rt_norms$Answer.train_image)
md$rt.norms <- rt_norms$log.rt[index]

summary(lmer(log.length_c~rt.norms + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ rt.norms + (1 + trial | workerid)
##    Data: md
## 
## REML criterion at convergence: 825.6
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.306 -0.589  0.083  0.590  3.705 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  workerid (Intercept) 0.43683  0.6609        
##           trial       0.00277  0.0527   -0.08
##  Residual             0.14889  0.3859        
## Number of obs: 600, groups: workerid, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   -2.262      1.373   -1.65
## rt.norms       0.770      0.187    4.12
## 
## Correlation of Fixed Effects:
##          (Intr)
## rt.norms -0.998
summary(lmer(log.length_c~rt.norms + trial + (1|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ rt.norms + trial + (1 | workerid)
##    Data: md
## 
## REML criterion at convergence: 836.3
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.119 -0.587  0.079  0.615  3.803 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  workerid (Intercept) 0.481    0.693   
##  Residual             0.165    0.406   
## Number of obs: 600, groups: workerid, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -1.71467    1.39726   -1.23
## rt.norms     0.70745    0.19008    3.72
## trial       -0.03215    0.00578   -5.57
## 
## Correlation of Fixed Effects:
##          (Intr) rt.nrm
## rt.norms -0.998       
## trial    -0.009 -0.014
#rt norms predict length

cor.test(md$log.length_c,md$rt.norms)
## 
##  Pearson's product-moment correlation
## 
## data:  md$log.length_c and md$rt.norms
## t = 0.848, df = 598, p-value = 0.3968
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.04551  0.11438
## sample estimates:
##     cor 
## 0.03466

rt norms plot

ms <- aggregate(log.length_c ~ rt.norms + picture, data=md, mean)
ms$cih <- aggregate(log.length_c ~ rt.norms + picture, data=md, ci.high)$log.length_c
ms$cil <- aggregate(log.length_c ~ rt.norms + picture, data=md, ci.low)$log.length_c
ms$n <- aggregate(workerid ~ rt.norms + picture, data=md, n.unique)$workerid

ggplot(ms, aes(rt.norms,log.length_c)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbar(aes(ymax=log.length_c+cih,ymin=log.length_c-cil), size=0.2, colour="grey") +
  theme_bw() +
  xlab("Object RT Norms") +
  ylab("Log Description Length (characters)") +
  theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) 

plot of chunk unnamed-chunk-40

# reliable when control for random effects

(4) Geons

(A) Norms Complexity (Task) RT task

(B) Mapping task (Task)