Referential Complexity Analyses

M. Lewis

August 04, 2014



Analyses:

  1. Cross-linguistic analyses
    (A) Complexity Norms
    (B) Correlation between all lengths
    © Correlation between all lengths, controling for frequency, open class only
    (D) Correlation between all lengths and complexity, controling for frequency
    (E) Translation check data

  2. High frequency words in mapping task

  3. Novel real objects
    (A) Norms
    (B) Mappping task (adults) TO DO
    © Mapping task (children) TO DO
    (D) Production task (labels + descriptions) TO DO

  4. Geons
    (A) Norms
    (B) Mappping task TO DO

figure out how to clear before start new experiment save to git hub clean up so that only see plots and critical statistical results check that all experients remove duplicates



Set global variables

processNorms = TRUE # process norms or load norms? 
removeRepeatSubj = TRUE  # remove repeat subjects?
savePlots = FALSE # save plots to pdf?

LOAD PACKAGES, FUNCTIONS, AND REPEAT SUBJ DATA FILE



(1) Cross-linguistic analyses (Complexity norms task)

(A) Norms

(B) Correlation between all lengths

read in data and merge with English complexity norms

xling = read.csv("data/xling_csv.csv") 
xling = merge(xling, englishComplexityNorms, by.x = "ENGLISH", by.y = "word")

# get rid of bad item (peso)
xling = xling[xling$ENGLISH != "peso",]

word class distribution

xling$Open_class = as.factor(xling$Open_class)
counts = as.data.frame(summary(xling$Open_class))
counts$class = c("closed class", "open class bare", "open class inflected")
names(counts) = c("freq", "class")

ggplot(counts, aes(class, freq, fill = class)) + 
  geom_bar(stat = "identity") +
  ggtitle("Word types in corpus")

plot of chunk unnamed-chunk-6

lens = c(which(grepl("LEN",names(xling)))) # get length column indices
col1 <- colorRampPalette(c("blue", "white" , "red"))

## Correlations between all lengths, all words
xling_len = xling[, lens] 
names(xling_len) = as.character(tolower(lapply(str_split(names(xling_len),"_"),function(x) {x[1]})))

# Correlations between all lengths
cmat = cor(xling_len, use = "pairwise.complete.obs")
corrplot(cmat,  tl.cex=.5, tl.srt=45, method = "color", tl.col = "black" ,col =col1(100),order = "FPC")

plot of chunk unnamed-chunk-7

mean(cmat)
## [1] 0.3213
## Correlations between all lengths, open class words only
xlingO = xling[xling$Open_class != 0,lens] 
names(xlingO) = as.character(tolower(lapply(str_split(names(xlingO),"_"),function(x) {x[1]})))

# correlations between all lenghts
cmat = cor(xlingO, use = "pairwise.complete.obs")
corrplot(cmat,  tl.cex=.5, tl.srt=45, method = "color", tl.col = "black" ,col =col1(100), order = "FPC")

plot of chunk unnamed-chunk-7

mean(cmat)
## [1] 0.2876

( C ) Correlation between all lengths, controling for frequency

## all words
xling_len_p = xling[,c(lens, which(names(xling)== "log.e.freq"))] 
names(xling_len_p) = as.character(tolower(lapply(str_split(names(xling_len_p),"_"),function(x) {x[1]})))

# correlations between all lengths, open class only
cmat.p = partial.r(xling_len_p,1:80,81 ) 
mean(cmat.p)
## [1] 0.216
## open class words only
xlingOF = xling[xling$Open_class !=0 ,c(lens, which(names(xling)== "log.e.freq"))] 
names(xlingOF) = as.character(tolower(lapply(str_split(names(xlingOF),"_"),function(x) {x[1]})))

# correlations between all lengths, open class only
cmat.p = partial.r(xlingOF,1:80,81 ) 

# sorted by first principle component
if (savePlots) {pdf('sort.pdf',height = 10, width = 10)}
corrplot(cmat.p,  tl.cex=.5, tl.srt=45,  order = "FPC", method = "color", tl.col = "black" ,col =col1(100))

plot of chunk unnamed-chunk-8

if (savePlots) {dev.off() }
# sorted by  angular order of the eigenvectors.
corrplot(cmat.p,  tl.cex=.5, tl.srt=45,  order = "AOE", method = "color", tl.col = "black" ,col =col1(100))

plot of chunk unnamed-chunk-8

# sorted by hierarchical clustering
corrplot(cmat.p,  tl.cex=.5, tl.srt=45,  order = "hclus", method = "color", tl.col = "black", col =col1(100) )

plot of chunk unnamed-chunk-8

mean(cmat.p)
## [1] 0.2201

(D) Correlation between lengths and complexity, open class only, controling for frequency

c_l$checked_only = ifelse(c_l$Checked == "yes", c_l$corr, 0)
c_l$uci = ifelse(c_l$Checked == "yes",  c_l$upper.ci, 0)
c_l$lci = ifelse(c_l$Checked == "yes",  c_l$lower.ci, 0)

### Plot with bootsrapped CIs on pearsons are, and parial frequencies
if (savePlots) {pdf("figure/p0.pdf", width = 10, height = 6 )}
ggplot(c_l, aes(language, 0, fill = Checked)) + 
  geom_bar(stat = "identity", ) + 
  ylab("Pearson's r") + xlab("Language") + 
  #ggtitle("Correlation between word length and complexity norms") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  #geom_linerange(aes(ymax=uci, ymin=lci)) +
  #geom_point(data=c_l, mapping=aes(x=language, y=p.corr), size=2, shape = 17) +
  #geom_hline(y=mean(c_l$corr),lty=2) +
  theme(
    plot.background = element_blank()
   ,panel.grid.major = element_blank()
   ,panel.grid.minor = element_blank()
   #,panel.border = element_blank()
  )  +
  theme(axis.title.x = element_text( size=25), axis.text.x  = element_text( size=10),
        axis.title.y = element_text( size=25), axis.text.y  = element_text( size=10)) +
  theme(legend.text = element_text(size = 10), legend.title = element_text(size = 10)) +
 # annotate("text", x = 75, y =mean(c_l$corr) + .02 , label=paste("M=",round(mean(c_l$corr),2), sep = "")) +
  scale_fill_manual(values=c("pink", "red")) +
  theme(legend.position="none") +
 scale_y_continuous(limits = c(-.07, .7)) 

plot of chunk unnamed-chunk-10

if (savePlots) {dev.off()}

if (savePlots) {pdf("figure/p1.pdf", width = 10, height = 6 )}
ggplot(c_l, aes(language, checked_only, fill = Checked)) + 
  geom_bar(stat = "identity", ) + 
  ylab("Pearson's r") + xlab("Language") + 
  #ggtitle("Correlation between word length and complexity norms") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  geom_linerange(aes(ymax=uci, ymin=lci)) +
  #geom_point(data=c_l, mapping=aes(x=language, y=p.corr), size=2, shape = 17) +
  #geom_hline(y=mean(c_l$corr),lty=2) +
  theme(
    plot.background = element_blank()
   ,panel.grid.major = element_blank()
   ,panel.grid.minor = element_blank()
   #,panel.border = element_blank()
  )  +
  theme(axis.title.x = element_text( size=25), axis.text.x  = element_text( size=10),
        axis.title.y = element_text( size=25), axis.text.y  = element_text( size=10)) +
  theme(legend.text = element_text(size = 10), legend.title = element_text(size = 10)) +
 # annotate("text", x = 75, y =mean(c_l$corr) + .02 , label=paste("M=",round(mean(c_l$corr),2), sep = "")) +
  scale_fill_manual(values=c("pink", "red")) +
  theme(legend.position="none") +
   scale_y_continuous(limits = c(-.07, .7)) 

plot of chunk unnamed-chunk-10

if (savePlots) {dev.off()}

### Plot with bootsrapped CIs on pearsons are, and parial frequencies
if (savePlots) {pdf("figure/p2.pdf", width = 10, height = 6 )}

ggplot(c_l, aes(language, corr, fill = Checked)) + 
  geom_bar(stat = "identity", ) + 
  ylab("Pearson's r") + xlab("Language") + 
  #ggtitle("Correlation between word length and complexity norms") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  geom_linerange(aes(ymax=upper.ci, ymin=lower.ci)) +
  #geom_point(data=c_l, mapping=aes(x=language, y=p.corr), size=2, shape = 17) +
  geom_hline(y=mean(c_l$corr),lty=2) +
  theme(
    plot.background = element_blank()
   ,panel.grid.major = element_blank()
   ,panel.grid.minor = element_blank()
   #,panel.border = element_blank()
  )  +
  theme(axis.title.x = element_text( size=25), axis.text.x  = element_text( size=10),
        axis.title.y = element_text( size=25), axis.text.y  = element_text( size=10)) +
  theme(legend.text = element_text(size = 10), legend.title = element_text(size = 10)) +
 # annotate("text", x = 75, y =mean(c_l$corr) + .02 , label=paste("M=",round(mean(c_l$corr),2), sep = "")) +
  scale_fill_manual(values=c("pink", "red")) +
  theme(legend.position="none") +
 scale_y_continuous(limits = c(-.07, .7)) 

plot of chunk unnamed-chunk-10

if (savePlots) {dev.off()}

if (savePlots) {pdf("figure/p3.pdf", width = 10, height = 6 ) }
ggplot(c_l, aes(language, corr, fill = Checked)) + 
  geom_bar(stat = "identity", ) + 
  ylab("Pearson's r") + xlab("Language") + 
  #ggtitle("Correlation between word length and complexity norms") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  geom_linerange(aes(ymax=upper.ci, ymin=lower.ci)) +
  geom_point(data=c_l, mapping=aes(x=language, y=p.corr), size=2, shape = 17) +
  geom_hline(y=mean(c_l$corr),lty=2) +
  theme(
    plot.background = element_blank()
   ,panel.grid.major = element_blank()
   ,panel.grid.minor = element_blank()
   #,panel.border = element_blank()
  )  +
  theme(axis.title.x = element_text( size=25), axis.text.x  = element_text( size=10),
        axis.title.y = element_text( size=25), axis.text.y  = element_text( size=10)) +
  theme(legend.text = element_text(size = 10), legend.title = element_text(size = 10)) +
 # annotate("text", x = 75, y =mean(c_l$corr) + .02 , label=paste("M=",round(mean(c_l$corr),2), sep = "")) +
  scale_fill_manual(values=c("pink", "red")) +
  theme(legend.position="none") +
  scale_y_continuous(limits = c(-.07, .7)) 

plot of chunk unnamed-chunk-10

if (savePlots) {dev.off()}

#verify correlations by looking at English
partial.r(xlingOC,c(1,which(names(xlingOC) == "complexity")), which(names(xlingOC) == "log.e.freq"))
## partial correlations 
##            english complexity
## english       1.00       0.55
## complexity    0.55       1.00
cor(xlingOC$english,xlingOC$complexity, use = "pairwise")
## [1] 0.6328
# mean correlation
mean(c_l$corr)
## [1] 0.3089

(E) Translation checking data

checksR = read.csv("/Documents/GRADUATE_SCHOOL/Projects/ref_complex/corpus/xling/translation_accuracy.csv")[1:500,]

index <- match(checksR$ENGLISH, xling$ENGLISH)
checksR$class <- xling$Open_class[index]
row.names(checksR) = checksR$ENGLISH
checksR$ENGLISH <- NULL
checksR$class = as.numeric(as.character(checksR$class))
accuracy = colSums(checksR[1:12], dims = 1)/ dim(checksR)[1]
mean(accuracy)
## [1] 0.919
dfa = as.data.frame(accuracy)
dfa$lang = row.names(dfa)

plot accuracy

plot of chunk unnamed-chunk-13

## [1] 0.919
## [1] NA
## [1] NA


(2) High frequency words in mapping task (Task)

read in data and prep variables

merge in stuff

get quintiles

aggregate by word

ms <- aggregate(LongBet  ~ engWord + complexity + log.e.freq + complexity_cil + complexity_cih + quintile, data=d, mean)
ms$bet_cil <- aggregate(LongBet  ~ engWord + complexity + log.e.freq + complexity_cil + complexity_cih, data=d, ci.low)$LongBet 
ms$bet_cih <- aggregate(LongBet  ~ engWord + complexity + log.e.freq + complexity_cil + complexity_cih, data=d, ci.high)$LongBet  

plot bet to long word vs. complexity norms

ggplot(ms, aes(norms.lf, LongBet)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbarh(aes(xmin=complexity_cil, xmax=complexity_cih), size=0.2, colour="grey") +
  geom_errorbar(aes(ymin=bet_cil, ymax=bet_cih), size=0.2, colour="grey") +
  annotate("text", x=6, y=25, label=paste("r=",round(cor(ms$norms.lf, ms$LongBet, use = "complete"), 2)))+
  xlab("Complexity Norms") +
  ylab("Bet to Long Word") +
  geom_vline(xintercept = q, col = "red") +
  ggtitle("High Frequency meanings (words)")
## Error: 'x' must be numeric

correlation between norms and length

# correlation between norms and bets to long word (all)
cor.test(d$LongBet,d$complexity)
## 
##  Pearson's product-moment correlation
## 
## data:  d$LongBet and d$complexity
## t = 3.348, df = 1998, p-value = 0.0008281
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03097 0.11814
## sample estimates:
##    cor 
## 0.0747
bm.partial(d$LongBet,d$complexity, d$log.e.freq )
## [1] 0.069
#partial.r(d[,c(4,8,10)],c(1,2),3 )

# correlation between norms and bets to long word (aggregated across words)
cor.test(ms$LongBet,ms$complexity)
## 
##  Pearson's product-moment correlation
## 
## data:  ms$LongBet and ms$complexity
## t = 2.227, df = 94, p-value = 0.02832
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02449 0.40614
## sample estimates:
##    cor 
## 0.2239
bm.partial(ms$LongBet,ms$complexity, ms$log.e.freq  )
## [1] 0.2283
summary(lmer(LongBet ~ complexity + log.e.freq + (1|trial) + (1|workerid), d))
## Linear mixed model fit by REML ['lmerMod']
## Formula: LongBet ~ complexity + log.e.freq + (1 | trial) + (1 | workerid)
##    Data: d
## 
## REML criterion at convergence: 17989
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2752 -0.7294  0.0043  0.7492  2.3246 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  workerid (Intercept)  15.19    3.90   
##  trial    (Intercept)   1.51    1.23   
##  Residual             637.67   25.25   
## Number of obs: 1931, groups: workerid, 200; trial, 10
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   56.215      2.872   19.58
## complexity     1.872      0.589    3.18
## log.e.freq    -3.858      0.625   -6.17
## 
## Correlation of Fixed Effects:
##            (Intr) cmplxt
## complexity -0.688       
## log.e.freq -0.709  0.047

plot by quintiles

#aggregate by quintile
ms <- aggregate(LongBet  ~ quintile , data=d, mean)
ms$bet_cil <- aggregate(LongBet  ~ quintile, data=d, ci.low)$LongBet  
ms$bet_cih <- aggregate(LongBet  ~ quintile, data=d, ci.high)$LongBet  


ggplot(ms, aes(quintile, LongBet)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbar(aes(ymin=bet_cil, ymax=bet_cih), size=0.2, colour="black") +
  annotate("text", x=5, y=25, label=paste("r=",round(cor(ms$quintile, ms$LongBet, use = "complete"), 2)))+
  scale_y_continuous(limits = c(20, 80)) +
  #scale_x_continuous(limits = c(0, 7), breaks = 1:7, labels = 1:7)  +
  theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) +
  xlab("Complexity Norm quintile") +
  ylab("Bet to Long Word") +
  ggtitle("High Frequency meanings (words)")

plot of chunk unnamed-chunk-20

correlations with quintiles

# correlation between norms quintiles and bets to long word 
cor.test(d$quintile, d$LongBet)
## 
##  Pearson's product-moment correlation
## 
## data:  d$quintile and d$LongBet
## t = 3.939, df = 1998, p-value = 8.459e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.04412 0.13111
## sample estimates:
##     cor 
## 0.08779
# correlation between norms quintiles and bets to long word (aggregated across words)
cor.test(ms$quintile, ms$LongBet)
## 
##  Pearson's product-moment correlation
## 
## data:  ms$quintile and ms$LongBet
## t = 2.35, df = 3, p-value = 0.1003
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2667  0.9866
## sample estimates:
##   cor 
## 0.805

residual quintiles (controls for frequency)

get quintiles

###Plot

#aggregate by quintile
ms.qr <- aggregate(LongBet  ~ resid.quintile , data=d, mean)
ms.qr$bet_cil <- aggregate(LongBet  ~ resid.quintile, data=d, ci.low)$LongBet  
ms.qr$bet_cih <- aggregate(LongBet  ~ resid.quintile, data=d, ci.high)$LongBet  

ggplot(ms.qr, aes(resid.quintile, LongBet)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbar(aes(ymin=bet_cil, ymax=bet_cih), size=0.2, colour="black") +
  annotate("text", x=5, y=25, label=paste("r=",round(cor(d$resid.quintile, d$LongBet, use = "complete"), 2)))+
  scale_y_continuous(limits = c(20, 80)) +
  #scale_x_continuous(limits = c(0, 7), breaks = 1:7, labels = 1:7)  +
  theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) +
  xlab("RESIDUAL Complexity Norm quintile") +
  ylab("Bet to Long Word") +
ggtitle("High Frequency meanings (words)")
## Warning: NaNs produced

plot of chunk unnamed-chunk-23

residual quintiles correlations

cor.test(d$resid.quintile, d$LongBet) ### highly correlated
## 
##  Pearson's product-moment correlation
## 
## data:  d$resid.quintile and d$LongBet
## t = 5.218, df = 1998, p-value = 1.998e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.07248 0.15897
## sample estimates:
##    cor 
## 0.1159
cor.test(ms.qr$resid.quintile, ms.qr$LongBet) 
## Error: not enough finite observations


(3) Novel real objects

(A) Norms Complexity norming task RT task

Complexity Norms

Get reliability between two samples

  cor(co_norms$rating_1, co_norms$rating_2)
## [1] 0.8664

Complexity Norms

(B) Mapping task (adults) (Task)

read in data and format

make everything factors

merge in norms

get effect sizes

get obj conds

ratio plots

ggplot(de, aes(y=effect_size, x=objRatio)) +
  geom_pointrange(aes(ymax = cill, ymin=ciul))+
  geom_hline(yintercept=0,lty=2) +
  stat_smooth(method="lm") +
  geom_text(aes(objRatio+.03, effect_size, label=objCondition)) +
  ylab("effect size") +
  xlab("object ratio") +
  theme(text = element_text(size=20), plot.title = element_text(size=20)) +
  ggtitle("Object ratio vs. effect size") +
  annotate("text", x=.3, y=-.2, col = "red",label=paste("r=",round(cor(de$effect_size, de$objRatio, use = "complete"), 2)))

plot of chunk unnamed-chunk-33

ggplot(de, aes(y=effect_size, x=c.Mratio)) +
  geom_pointrange(aes(ymax = cill, ymin=ciul),position="dodge")+
  geom_hline(yintercept=0,lty=2) +
  stat_smooth(method="lm") +
  geom_text(aes(c.Mratio+.02, effect_size, label=objCondition), position="dodge") +
  ylab("effect size") +
  xlab("object ratio") + 
  ggtitle("complexity ratio vs. effect size") +
  theme(text = element_text(size=20), plot.title = element_text(size=20)) +
  annotate("text", x=.5, y=-.2, col = "red",label=paste("r=",round(cor(de$effect_size, de$c.Mratio, use = "complete"), 2)))
## ymax not defined: adjusting position using y instead

plot of chunk unnamed-chunk-33

ggplot(de, aes(y=effect_size, x=rt.Mratio)) +
  geom_pointrange( aes(ymax = cill, ymin=ciul))+
  geom_hline(yintercept=0,lty=2) +
  stat_smooth(method="lm") +
  geom_text(aes(rt.Mratio+.0008, effect_size, label=objCondition)) +
  ylab("effect size") +
  xlab("RT ratio") +
   ggtitle("RT ratio vs. effect size") +
  theme(text = element_text(size=20), plot.title = element_text(size=20)) +
  annotate("text", x=.985, y=-.2, col = "red",label=paste("r=",round(cor(de$effect_size, de$rt.Mratio, use = "complete"), 2)))

plot of chunk unnamed-chunk-33

correlations between effect size at complexity conditions

cor.test(de$objRatio, de$effect_size)
## 
##  Pearson's product-moment correlation
## 
## data:  de$objRatio and de$effect_size
## t = -3.051, df = 13, p-value = 0.009273
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8703 -0.1999
## sample estimates:
##    cor 
## -0.646
cor.test(de$c.Mratio, de$effect_size)
## 
##  Pearson's product-moment correlation
## 
## data:  de$c.Mratio and de$effect_size
## t = -3.601, df = 13, p-value = 0.003228
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8949 -0.3046
## sample estimates:
##     cor 
## -0.7066
cor.test(de$rt.Mratio, de$effect_size)
## 
##  Pearson's product-moment correlation
## 
## data:  de$rt.Mratio and de$effect_size
## t = -3.722, df = 13, p-value = 0.002561
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8995 -0.3259
## sample estimates:
##     cor 
## -0.7182

© Mapping task (children) (Task)

(D) Production task (labels + desecriptions)

(1) Labels (Task)

read in data and prep data frame

relationship between condition and description length

t.test(md[md$condition == '"complex"',"log.length"],md[md$condition == '"simple"',"log.length"],paired = TRUE)
## 
##  Paired t-test
## 
## data:  md[md$condition == "\"complex\"", "log.length"] and md[md$condition == "\"simple\"", "log.length"]
## t = 3.735, df = 286, p-value = 0.0002269
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.03846 0.12417
## sample estimates:
## mean of the differences 
##                 0.08131
summary(lmer(log.length~condition + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length ~ condition + (1 + trial | workerid)
##    Data: md
## 
## REML criterion at convergence: -0.4
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.009 -0.626  0.061  0.633  2.773 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  workerid (Intercept) 1.53e-02 0.12358      
##           trial       3.40e-06 0.00184  1.00
##  Residual             4.92e-02 0.22189      
## Number of obs: 574, groups: workerid, 59
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)         1.9350     0.0218    88.7
## condition"simple"  -0.0797     0.0185    -4.3
## 
## Correlation of Fixed Effects:
##             (Intr)
## cndtn"smpl" -0.427

relationship with complicated norms

index <- match(md$picture, co_norms$ratingNum)
md$c.norms <- co_norms$value[index]

ms <- aggregate(log.length ~ c.norms + picture, data=md, mean)
## Error: object 'c.norms' not found
ms$cih <- aggregate(log.length ~ c.norms + picture, data=md, ci.high)$log.length
## Error: object 'c.norms' not found
ms$cil <- aggregate(log.length ~ c.norms + picture, data=md, ci.low)$log.length
## Error: object 'c.norms' not found
#plot
ggplot(ms, aes(c.norms,log.length)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbar(aes(ymax=log.length+cih,ymin=log.length-cil), size=0.2, colour="grey") +
  theme_bw() +
  xlab("Object Complexity Norms") +
  ylab("Log Word Length (characters)") +
  theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) 
## Error: object 'c.norms' not found

relationship with RT norms

index <- match(md$picture, rto_norms$Answer.train_image)
md$rt.norms <- rto_norms$log.rt[index]

ms <- aggregate(log.length ~ rt.norms + picture, data=md, mean)
ms$cih <- aggregate(log.length ~ rt.norms + picture, data=md, ci.high)$log.length
ms$cil <- aggregate(log.length ~ rt.norms + picture, data=md, ci.low)$log.length

#plot
ggplot(ms, aes(rt.norms,log.length)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbar(aes(ymax=log.length+cih,ymin=log.length-cil), size=0.2, colour="grey") +
  theme_bw() +
  xlab("Object Complexity Norms") +
  ylab("Log Word Length (characters)") +
  theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) 

plot of chunk unnamed-chunk-38

(2) Descriptions (Task)

read in data and prep data frame

relationship between condition and description length

#summary(lmer(length_c~condition + (1|workerid), md))
#summary(lmer(length_c~condition + trial + (1+trial|workerid), md))

summary(lmer(log.length_c~md$condition + (1|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ md$condition + (1 | workerid)
##    Data: md
## 
## REML criterion at convergence: 863.1
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.004 -0.616  0.071  0.599  4.269 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  workerid (Intercept) 0.476    0.690   
##  Residual             0.175    0.418   
## Number of obs: 600, groups: workerid, 60
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)            3.3545     0.0922    36.4
## md$condition"simple"  -0.1128     0.0342    -3.3
## 
## Correlation of Fixed Effects:
##             (Intr)
## md$cndtn"s" -0.185
summary(lmer(log.length_c~condition + trial + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ condition + trial + (1 + trial | workerid)
##    Data: md
## 
## REML criterion at convergence: 823.1
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.185 -0.559  0.071  0.594  3.841 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  workerid (Intercept) 0.43168  0.6570        
##           trial       0.00177  0.0421   -0.02
##  Residual             0.14949  0.3866        
## Number of obs: 600, groups: workerid, 60
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)        3.54229    0.09310    38.0
## condition"simple" -0.12772    0.03260    -3.9
## trial             -0.03278    0.00773    -4.2
## 
## Correlation of Fixed Effects:
##             (Intr) cndt""
## cndtn"smpl" -0.189       
## trial       -0.252  0.031
## plot
ggplot(md, aes(x=log.length_c, fill=condition)) + geom_density(alpha = 0.2)

plot of chunk unnamed-chunk-40

correlations with complexity norms

index <- match(md$picture, co_norms$ratingNum)
md$c.norms <- co_norms$value[index]

summary(lmer(log.length_c~c.norms + (1+trial|workerid), md))
## Error: error in evaluating the argument 'object' in selecting a method for function 'summary': Error in eval(expr, envir, enclos) : object 'c.norms' not found
## Calls: lmer ... eval -> model.frame -> model.frame.default -> eval -> eval
summary(lmer(log.length_c~c.norms + trial + (1|workerid), md))
## Error: error in evaluating the argument 'object' in selecting a method for function 'summary': Error in eval(expr, envir, enclos) : object 'c.norms' not found
## Calls: lmer ... eval -> model.frame -> model.frame.default -> eval -> eval
# complexity norms predict length

cor.test(md$log.length_c,md$c.norms)
## Error: 'x' and 'y' must have the same length

complexity norms plot

ms <- aggregate(log.length_c ~ c.norms + picture, data=md, mean)
## Error: object 'c.norms' not found
ms$cih <- aggregate(log.length_c ~ c.norms + picture, data=md, ci.high)$log.length_c
## Error: object 'c.norms' not found
ms$cil <- aggregate(log.length_c ~ c.norms + picture, data=md, ci.low)$log.length_c
## Error: object 'c.norms' not found
ms$n <- aggregate(workerid ~ c.norms + picture, data=md, n.unique)$workerid
## Error: object 'c.norms' not found
ggplot(ms, aes(c.norms,log.length_c)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbar(aes(ymax=log.length_c+cih,ymin=log.length_c-cil), size=0.2, colour="grey") +
  theme_bw() +
  xlab("Object Complexity Norms") +
  ylab("Log Description Length (characters)") +
  theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) 
## Error: object 'c.norms' not found

correlations with RT norms

index <- match(md$picture, rto_norms$Answer.train_image)
md$rt.norms <- rto_norms$log.rt[index]

summary(lmer(log.length_c~rt.norms + (1+trial|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ rt.norms + (1 + trial | workerid)
##    Data: md
## 
## REML criterion at convergence: 830.1
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.357 -0.566  0.076  0.560  3.756 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  workerid (Intercept) 0.43769  0.6616        
##           trial       0.00278  0.0527   -0.08
##  Residual             0.15004  0.3873        
## Number of obs: 600, groups: workerid, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   -0.506      1.089   -0.46
## rt.norms       0.523      0.146    3.58
## 
## Correlation of Fixed Effects:
##          (Intr)
## rt.norms -0.997
summary(lmer(log.length_c~rt.norms + trial + (1|workerid), md))
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.length_c ~ rt.norms + trial + (1 | workerid)
##    Data: md
## 
## REML criterion at convergence: 840.5
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.165 -0.601  0.069  0.618  3.872 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  workerid (Intercept) 0.481    0.694   
##  Residual             0.166    0.408   
## Number of obs: 600, groups: workerid, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  -0.0332     1.1091   -0.03
## rt.norms      0.4714     0.1485    3.17
## trial        -0.0322     0.0058   -5.56
## 
## Correlation of Fixed Effects:
##          (Intr) rt.nrm
## rt.norms -0.996       
## trial    -0.008 -0.020
#rt norms predict length

cor.test(md$log.length_c,md$rt.norms)
## 
##  Pearson's product-moment correlation
## 
## data:  md$log.length_c and md$rt.norms
## t = 0.2782, df = 598, p-value = 0.781
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.06873  0.09134
## sample estimates:
##     cor 
## 0.01138

rt norms plot

ms <- aggregate(log.length_c ~ rt.norms + picture, data=md, mean)
ms$cil <- aggregate(log.length_c ~ rt.norms + picture, data=md, ci.low)$log.length_c
ms$cih <- aggregate(log.length_c ~ rt.norms + picture, data=md, ci.high)$log.length_c


ggplot(ms, aes(rt.norms,log.length_c)) +
  geom_point() + 
  geom_smooth(method = "lm", color="blue", formula = y ~ x) +
  geom_errorbar(aes(ymax=log.length_c+cih,ymin=log.length_c-cil), size=0.2, colour="grey") +
  theme_bw() +
  xlab("Object RT Norms") +
  ylab("Log Description Length (characters)") +
  theme(axis.title=element_text(size=20), axis.text=element_text(size=15)) 

plot of chunk unnamed-chunk-44

# reliable when control for random effects


(4) Geons

(A) Norms (Complexity Task) (RT task)

(1) Complexity Norms

(2) RT Norms

(B) Mapping task (Task)