library(ggplot2)
library(directlabels)
library(car)
library(lme4)
library(langcog)
library(dplyr)
library(knitr)
# library(plyr)

rm(list=ls())
theme_set(theme_mikabr() +
            theme(panel.grid = element_blank(),
                  strip.background = element_blank()))
font <- "Open Sans"

1 Read in data

data = read.csv("data/study1/Combined 2011 2012 coded data 11-25-13.csv")

Data cleaning.

data$rdif=round(data$difficulty)
data$size.01 <- ifelse(data$size > 2, 1, ifelse(data$size < 3, 0, NA))
data$g.present = ifelse(data$g.present==9, NA, data$g.present)
data = data[data$subnum!=2455,]

2 Descriptives

Proportion of trials containing gesture is 0.952529.

Now look at the the trials with no gesture.

length(data[data$g.present==0 & !is.na(data$g.present),]$g.present) #168 trials
## [1] 168
nog <- data[data$g.present==0 & !is.na(data$g.present),]
hist(nog$level)

sort(nog$level)
##   [1]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
##  [24]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
##  [47]  1  1  1  1  1  1  1  1  1  1  1  2  2  2  2  2  2  2  2  2  2  2  2
##  [70]  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2
##  [93]  2  2  2  2  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3
## [116]  3  3  3  3  3  3  3  3  3  3  3  3  3  4  4  4  4  4  4  4  4  4  5
## [139]  5  5  5  5  5  5  5  5  5  6  6  6  6  6  7  8  9  9  9  9 10 11 11
## [162] 12 12 12 13 13 13 13

129 (77%) were on trials of level 3 or lower, meaning that there were only 2 addends in the problem.

3 Statistical models

consider the following variables: level, difficulty, # of moves,trial

hist(data$level)

hist(data$difficulty)

hist(data$moves)

hist(data$trial)

3.1 Basic model

sizefull <- lmer(size ~ difficulty + moves + trial + log(level) + 
                   (difficulty + moves + trial + log(level)| subnum), 
                 data=data) 
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 2 negative
## eigenvalues

And show coefficients in a nice table.

kable(summary(sizefull)$coefficients, digits = 3)
Estimate Std. Error t value
(Intercept) 1.958 0.122 16.055
difficulty 0.036 0.019 1.939
moves -0.004 0.004 -1.179
trial 0.001 0.002 0.812
log(level) 0.186 0.058 3.233

and p-vals by t=z

2*(1-pnorm(abs((fixef(sizefull)/sqrt(diag(vcov(sizefull)))))))
## (Intercept)  difficulty       moves       trial  log(level) 
## 0.000000000 0.052471109 0.238252612 0.416566441 0.001225578

3.2 Control for coder

Do we still see effects of difficulty and level? do these interact with coder? Yes and yes.

sizefull.coder <- lmer(size ~ difficulty + log(level) + 
                         coder * difficulty + coder * level + 
                   (difficulty + log(level) | subnum), 
                 data=data) 
kable(summary(sizefull.coder)$coefficients, digits = 3)
Estimate Std. Error t value
(Intercept) 2.630 0.246 10.713
difficulty 0.091 0.039 2.373
log(level) 0.288 0.071 4.081
coderCK -1.086 0.205 -5.308
coderES -1.093 0.225 -4.853
coderHF -0.598 0.232 -2.574
coderLT -0.537 0.137 -3.907
coderMH -0.500 0.309 -1.615
coderRESOLVED -0.315 0.195 -1.618
level -0.101 0.037 -2.748
difficulty:coderCK -0.094 0.033 -2.819
difficulty:coderES -0.119 0.038 -3.150
difficulty:coderHF -0.042 0.041 -1.041
difficulty:coderLT -0.041 0.023 -1.808
difficulty:coderMH 0.032 0.054 0.590
difficulty:coderRESOLVED -0.023 0.029 -0.784
coderCK:level 0.114 0.032 3.600
coderES:level 0.110 0.035 3.145
coderHF:level 0.089 0.035 2.523
coderLT:level 0.071 0.023 3.120
coderMH:level 0.043 0.048 0.882
coderRESOLVED:level 0.031 0.030 1.036

Another approach: analyze coder as a random effect - this is maybe more correct?

sizefull.coder <- lmer(size ~ difficulty + moves + trial + log(level) + 
                         (difficulty + moves + trial + log(level) | coder) +  
                   (difficulty + moves + trial + log(level) | subnum), 
                 data=data) 
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 10 negative
## eigenvalues
kable(summary(sizefull.coder)$coefficients, digits = 3)
Estimate Std. Error t value
(Intercept) 2.048 0.228 8.993
difficulty 0.052 0.167 0.309
moves -0.007 0.089 -0.076
trial 0.000 0.189 0.000
log(level) 0.233 0.175 1.328

3.3 Binomial analyses

size.allfour.binom <- glmer(size.01 ~ difficulty + moves + trial + log(level) + 
                              (difficulty + moves + trial + log(level)| subnum), 
                            data = data, family="binomial")
## Warning in optwrap(optimizer, devfun, start, rho$lower, control =
## control, : convergence code 1 from bobyqa: bobyqa -- maximum number of
## function evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues

And show coefficients.

kable(summary(size.allfour.binom)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.819 1.047 -2.693 0.007
difficulty 0.329 0.156 2.106 0.035
moves -0.027 0.030 -0.880 0.379
trial -0.010 0.015 -0.683 0.495
log(level) 0.757 0.637 1.189 0.235

and p-vals by t=z

2*(1-pnorm(abs((fixef(size.allfour.binom)/sqrt(diag(vcov(size.allfour.binom)))))))
## (Intercept)  difficulty       moves       trial  log(level) 
## 0.007081421 0.035196425 0.379124356 0.494586625 0.234561425

4 Plot

Size by level and difficulty graph

get median difficulty level by problem level

ms <- aggregate(difficulty ~ level + subnum, data=data, mean)
meds <- aggregate(difficulty ~ level, data=ms, median)
names(meds) <- c("level","med")
data2 <- merge(data, meds, by="level", all.x=T)
data <- data2
data$Difficulty_Category <- ifelse(data$difficulty < data$med, 
                                   "Below Median Difficulty (Easier)", 
                                   "At or Above Median Difficulty (Harder)")

More aggregation.

data$Subjective_Difficulty <- data$Difficulty_Category
ms1 <- aggregate(size ~ level + Subjective_Difficulty + subnum, data=data, mean)
ms2 <- aggregate(size ~ level + Subjective_Difficulty, data=ms1, mean)
data$Subjective_Difficulty <- factor(data$Subjective_Difficulty)
ms2$measure.type <- "Gesture Size"
ms2$measure <- ms2$size
ms <- bind_rows(ms2)

And plot.

qplot(level,measure, colour=Subjective_Difficulty,
      label=c(Subjective_Difficulty),
      data=ms, 
      ylab=c("Average Gesture Size"), 
      xlab=c("Problem Level (Number of Addends - 1)"), ylim=c(1,4)) +
  geom_smooth() +
  theme_bw(base_size=14) +
  scale_color_brewer(palette="Set1")+
  theme(legend.justification = c(0,0), legend.position=c(.6,.7))

And by coder.

ms1 <- aggregate(size ~ level + Subjective_Difficulty + subnum + coder, data=data, mean)
ms2 <- aggregate(size ~ level + Subjective_Difficulty + coder, data=ms1, mean)
data$Subjective_Difficulty <- factor(data$Subjective_Difficulty)
ms2$measure.type <- "Gesture Size"
ms2$measure <- ms2$size
ms <- bind_rows(ms2)

qplot(level,measure, colour=Subjective_Difficulty,
      label=c(Subjective_Difficulty),
      data=ms, 
      ylab=c("Average Gesture Size"), 
      xlab=c("Problem Level (Number of Addends - 1)"), ylim=c(1,4)) +
  geom_smooth() +
  facet_wrap(~coder) + 
  theme_bw(base_size=14) +
  scale_color_brewer(palette="Set1")+
  theme(legend.justification = c(0,0), legend.position="bottom")

5 Skips analysis

all.cor <- data[data$correct==1,]
all.cor$rdif <- round(all.cor$difficulty)
all.cor$mean <- (all.cor$movesmin + all.cor$movesmax)/2
all.cor$prop.moves2 <- all.cor$mean / all.cor$moves

Histograms.

qplot(prop.moves2, data=all.cor)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1212 rows containing non-finite values (stat_bin).

Square root transform to increase normality.

qplot(sqrt(prop.moves2), data=all.cor)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1212 rows containing non-finite values (stat_bin).

5.1 Model

Prop moves by difficulty model.

prop6 <- lmer(sqrt(prop.moves2) ~ difficulty + moves + trial+level+ 
                (1+difficulty + moves + trial+level|subnum), data = all.cor) # prop.moves by difficulty
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues

And show coefficients.

kable(summary(prop6)$coefficients, digits = 3)
Estimate Std. Error t value
(Intercept) 0.956 0.037 25.999
difficulty 0.014 0.006 2.319
moves -0.010 0.002 -6.633
trial 0.001 0.000 2.196
level 0.002 0.008 0.195

and p-vals by t=z

round(2*(1-pnorm(abs((fixef(prop6)/sqrt(diag(vcov(prop6))))))), digits=2)
## (Intercept)  difficulty       moves       trial       level 
##        0.00        0.02        0.00        0.03        0.85

5.2 No square-root transform

prop6.nosqrt <- lmer(prop.moves2 ~ difficulty + moves + trial+level+ 
                (1+difficulty + moves + trial+level|subnum), data = all.cor) 
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 1.72672 (tol =
## 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
kable(summary(prop6.nosqrt)$coefficients, digits = 3)
Estimate Std. Error t value
(Intercept) 0.935 0.057 16.482
difficulty 0.018 0.009 1.915
moves -0.014 0.003 -5.411
trial 0.002 0.001 2.082
level -0.004 0.013 -0.348
round(2*(1-pnorm(abs((fixef(prop6.nosqrt)/sqrt(diag(vcov(prop6.nosqrt))))))), digits =2)
## (Intercept)  difficulty       moves       trial       level 
##        0.00        0.06        0.00        0.04        0.73

5.3 Control for coder

Another approach: analyze coder as a random effect - this is maybe more correct?

Doesn’t converge with full predictor set (likely due to sparsity of trials by coder).

prop6.coder <- lmer(prop.moves2 ~ difficulty + moves + trial +  
                      (1 | coder) + 
                      (difficulty + moves + trial | subnum), 
                    data = all.cor) 
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
kable(summary(prop6.coder)$coefficients, digits = 3)
Estimate Std. Error t value
(Intercept) 0.935 0.047 19.881
difficulty 0.018 0.008 2.198
moves -0.015 0.002 -9.803
trial 0.002 0.001 2.034
round(2*(1-pnorm(abs((fixef(prop6.coder)/sqrt(diag(vcov(prop6.coder))))))), digits= 2)
## (Intercept)  difficulty       moves       trial 
##        0.00        0.03        0.00        0.04

5.4 Plot

at each level, split problems into harder and easier

ms <- aggregate(difficulty ~ moves + subnum, data=data, mean)
ms1 <- aggregate(difficulty ~ moves, data=ms, median)
names(ms1) <- c("moves","mdif")
all.cor2 <- merge(all.cor, ms1, by="moves", all.x=T)
all.cor <- all.cor2
all.cor$levdif <- ifelse(all.cor$difficulty < all.cor$mdif, "Below Median (Easier)", "At or Above Median (Harder)")

prop moves by # of moves and high/low dif

all.cor$Difficulty_Category <- all.cor$levdif
all.cor$Subjective_Difficulty <- all.cor$Difficulty_Category
ms <- aggregate(prop.moves2 ~ moves + Subjective_Difficulty + subnum, data=all.cor, mean)
ms1 <- aggregate(prop.moves2 ~ moves + Subjective_Difficulty, data=ms, mean)
ms1$measure.type <- "Proportion of Moves"
ms1$measure <- ms1$prop.moves2
ms <- bind_rows(ms1)
qplot(moves,measure, colour=Subjective_Difficulty,label=c("Subjective Difficulty"), data=ms, ylab=c("Proportion of Steps Gestured"), xlab=c("Number of Steps Required"), ylim=c(0,1.5)) +
  #   facet_grid(measure.type ~ .) + 
  geom_smooth() +
  theme_bw(base_size=14) +
  scale_color_brewer(palette="Set1")+
  theme(legend.justification = c(0,0), legend.position=c(.6,.7))

By coder.

ms <- aggregate(prop.moves2 ~ moves + Subjective_Difficulty + subnum + coder, data=all.cor, mean)
ms1 <- aggregate(prop.moves2 ~ moves + Subjective_Difficulty + coder, data=ms, mean)
ms1$measure.type <- "Proportion of Moves"
ms1$measure <- ms1$prop.moves2
ms <- bind_rows(ms1)

qplot(moves,measure, colour=Subjective_Difficulty,label=c("Subjective Difficulty"), data=ms, ylab=c("Proportion of Steps Gestured"), xlab=c("Number of Steps Required"), ylim=c(0,1.5)) +
  facet_grid(coder ~ .) +
  geom_smooth() +
  theme_bw(base_size=14) +
  scale_color_brewer(palette="Set1")+
  theme(legend.justification = c(0,0), legend.position=c(.6,.7))
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 28.925
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 10.075
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 36.906
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small.
## fewer data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 28.925
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 10.075
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 36.906
## Warning: Removed 2 rows containing missing values (geom_point).