library(ggplot2)
library(directlabels)
library(car)
library(lme4)
library(langcog)
library(dplyr)
library(knitr)
# library(plyr)

rm(list=ls())
theme_set(theme_mikabr() +
            theme(panel.grid = element_blank(),
                  strip.background = element_blank()))
font <- "Open Sans"

1 Read in data

data = read.csv("data/study1/Combined 2011 2012 coded data 11-25-13.csv")

Data cleaning.

round difficulty ratings so they make up distinct groups
create a binomial measure of gesture size
create a binomial measures of gesture present
exclude participant 2455 - only 5 years old

data$rdif=round(data$difficulty)
data$size.01 <- ifelse(data$size > 2, 1, ifelse(data$size < 3, 0, NA))
data$g.present = ifelse(data$g.present==9, NA, data$g.present)
data = data[data$subnum!=2455,]

2 Descriptives

Proportion of trials containing gesture is 0.952529.

Now look at the the trials with no gesture.

length(data[data$g.present==0 & !is.na(data$g.present),]$g.present) #168 trials

## [1] 168

nog <- data[data$g.present==0 & !is.na(data$g.present),]
hist(nog$level)

sort(nog$level)

##   [1]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
##  [24]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
##  [47]  1  1  1  1  1  1  1  1  1  1  1  2  2  2  2  2  2  2  2  2  2  2  2
##  [70]  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2
##  [93]  2  2  2  2  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3
## [116]  3  3  3  3  3  3  3  3  3  3  3  3  3  4  4  4  4  4  4  4  4  4  5
## [139]  5  5  5  5  5  5  5  5  5  6  6  6  6  6  7  8  9  9  9  9 10 11 11
## [162] 12 12 12 13 13 13 13

129 (77%) were on trials of level 3 or lower, meaning that there were only 2 addends in the problem.

3 Statistical models

consider the following variables: level, difficulty, # of moves,trial

hist(data$level)

hist(data$difficulty)

hist(data$moves)

hist(data$trial)

3.1 Basic model

sizefull <- lmer(size ~ difficulty + moves + trial + log(level) + 
                   (difficulty + moves + trial + log(level)| subnum), 
                 data=data)

## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 2 negative
## eigenvalues

And show coefficients in a nice table.

kable(summary(sizefull)$coefficients, digits = 3)

	Estimate	Std. Error	t value
(Intercept)	1.958	0.122	16.055
difficulty	0.036	0.019	1.939
moves	-0.004	0.004	-1.179
trial	0.001	0.002	0.812
log(level)	0.186	0.058	3.233

and p-vals by t=z

2*(1-pnorm(abs((fixef(sizefull)/sqrt(diag(vcov(sizefull)))))))

## (Intercept)  difficulty       moves       trial  log(level) 
## 0.000000000 0.052471109 0.238252612 0.416566441 0.001225578

3.2 Control for coder

Do we still see effects of difficulty and level? do these interact with coder? Yes and yes.

sizefull.coder <- lmer(size ~ difficulty + log(level) + 
                         coder * difficulty + coder * level + 
                   (difficulty + log(level) | subnum), 
                 data=data) 
kable(summary(sizefull.coder)$coefficients, digits = 3)

	Estimate	Std. Error	t value
(Intercept)	2.630	0.246	10.713
difficulty	0.091	0.039	2.373
log(level)	0.288	0.071	4.081
coderCK	-1.086	0.205	-5.308
coderES	-1.093	0.225	-4.853
coderHF	-0.598	0.232	-2.574
coderLT	-0.537	0.137	-3.907
coderMH	-0.500	0.309	-1.615
coderRESOLVED	-0.315	0.195	-1.618
level	-0.101	0.037	-2.748
difficulty:coderCK	-0.094	0.033	-2.819
difficulty:coderES	-0.119	0.038	-3.150
difficulty:coderHF	-0.042	0.041	-1.041
difficulty:coderLT	-0.041	0.023	-1.808
difficulty:coderMH	0.032	0.054	0.590
difficulty:coderRESOLVED	-0.023	0.029	-0.784
coderCK:level	0.114	0.032	3.600
coderES:level	0.110	0.035	3.145
coderHF:level	0.089	0.035	2.523
coderLT:level	0.071	0.023	3.120
coderMH:level	0.043	0.048	0.882
coderRESOLVED:level	0.031	0.030	1.036

Another approach: analyze coder as a random effect - this is maybe more correct?

sizefull.coder <- lmer(size ~ difficulty + moves + trial + log(level) + 
                         (difficulty + moves + trial + log(level) | coder) +  
                   (difficulty + moves + trial + log(level) | subnum), 
                 data=data)

## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 10 negative
## eigenvalues

kable(summary(sizefull.coder)$coefficients, digits = 3)

	Estimate	Std. Error	t value
(Intercept)	2.048	0.228	8.993
difficulty	0.052	0.167	0.309
moves	-0.007	0.089	-0.076
trial	0.000	0.189	0.000
log(level)	0.233	0.175	1.328

3.3 Binomial analyses

size.allfour.binom <- glmer(size.01 ~ difficulty + moves + trial + log(level) + 
                              (difficulty + moves + trial + log(level)| subnum), 
                            data = data, family="binomial")

## Warning in optwrap(optimizer, devfun, start, rho$lower, control =
## control, : convergence code 1 from bobyqa: bobyqa -- maximum number of
## function evaluations exceeded

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues

And show coefficients.

kable(summary(size.allfour.binom)$coefficients, digits = 3)

	Estimate	Std. Error	z value	Pr(>\|z\|)
(Intercept)	-2.819	1.047	-2.693	0.007
difficulty	0.329	0.156	2.106	0.035
moves	-0.027	0.030	-0.880	0.379
trial	-0.010	0.015	-0.683	0.495
log(level)	0.757	0.637	1.189	0.235

and p-vals by t=z

2*(1-pnorm(abs((fixef(size.allfour.binom)/sqrt(diag(vcov(size.allfour.binom)))))))

## (Intercept)  difficulty       moves       trial  log(level) 
## 0.007081421 0.035196425 0.379124356 0.494586625 0.234561425

4 Plot

Size by level and difficulty graph

get median difficulty level by problem level

ms <- aggregate(difficulty ~ level + subnum, data=data, mean)
meds <- aggregate(difficulty ~ level, data=ms, median)
names(meds) <- c("level","med")
data2 <- merge(data, meds, by="level", all.x=T)
data <- data2
data$Difficulty_Category <- ifelse(data$difficulty < data$med, 
                                   "Below Median Difficulty (Easier)", 
                                   "At or Above Median Difficulty (Harder)")

More aggregation.

data$Subjective_Difficulty <- data$Difficulty_Category
ms1 <- aggregate(size ~ level + Subjective_Difficulty + subnum, data=data, mean)
ms2 <- aggregate(size ~ level + Subjective_Difficulty, data=ms1, mean)
data$Subjective_Difficulty <- factor(data$Subjective_Difficulty)
ms2$measure.type <- "Gesture Size"
ms2$measure <- ms2$size
ms <- bind_rows(ms2)

And plot.

qplot(level,measure, colour=Subjective_Difficulty,
      label=c(Subjective_Difficulty),
      data=ms, 
      ylab=c("Average Gesture Size"), 
      xlab=c("Problem Level (Number of Addends - 1)"), ylim=c(1,4)) +
  geom_smooth() +
  theme_bw(base_size=14) +
  scale_color_brewer(palette="Set1")+
  theme(legend.justification = c(0,0), legend.position=c(.6,.7))

And by coder.

ms1 <- aggregate(size ~ level + Subjective_Difficulty + subnum + coder, data=data, mean)
ms2 <- aggregate(size ~ level + Subjective_Difficulty + coder, data=ms1, mean)
data$Subjective_Difficulty <- factor(data$Subjective_Difficulty)
ms2$measure.type <- "Gesture Size"
ms2$measure <- ms2$size
ms <- bind_rows(ms2)

qplot(level,measure, colour=Subjective_Difficulty,
      label=c(Subjective_Difficulty),
      data=ms, 
      ylab=c("Average Gesture Size"), 
      xlab=c("Problem Level (Number of Addends - 1)"), ylim=c(1,4)) +
  geom_smooth() +
  facet_wrap(~coder) + 
  theme_bw(base_size=14) +
  scale_color_brewer(palette="Set1")+
  theme(legend.justification = c(0,0), legend.position="bottom")

5 Skips analysis

all.cor <- data[data$correct==1,]
all.cor$rdif <- round(all.cor$difficulty)
all.cor$mean <- (all.cor$movesmin + all.cor$movesmax)/2
all.cor$prop.moves2 <- all.cor$mean / all.cor$moves

Histograms.

qplot(prop.moves2, data=all.cor)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 1212 rows containing non-finite values (stat_bin).

Square root transform to increase normality.

qplot(sqrt(prop.moves2), data=all.cor)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 1212 rows containing non-finite values (stat_bin).

5.1 Model

Prop moves by difficulty model.

prop6 <- lmer(sqrt(prop.moves2) ~ difficulty + moves + trial+level+ 
                (1+difficulty + moves + trial+level|subnum), data = all.cor) # prop.moves by difficulty

## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues

And show coefficients.

kable(summary(prop6)$coefficients, digits = 3)

	Estimate	Std. Error	t value
(Intercept)	0.956	0.037	25.999
difficulty	0.014	0.006	2.319
moves	-0.010	0.002	-6.633
trial	0.001	0.000	2.196
level	0.002	0.008	0.195

and p-vals by t=z

round(2*(1-pnorm(abs((fixef(prop6)/sqrt(diag(vcov(prop6))))))), digits=2)

## (Intercept)  difficulty       moves       trial       level 
##        0.00        0.02        0.00        0.03        0.85

5.2 No square-root transform

prop6.nosqrt <- lmer(prop.moves2 ~ difficulty + moves + trial+level+ 
                (1+difficulty + moves + trial+level|subnum), data = all.cor)

## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 1.72672 (tol =
## 0.002, component 1)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?

kable(summary(prop6.nosqrt)$coefficients, digits = 3)

	Estimate	Std. Error	t value
(Intercept)	0.935	0.057	16.482
difficulty	0.018	0.009	1.915
moves	-0.014	0.003	-5.411
trial	0.002	0.001	2.082
level	-0.004	0.013	-0.348

round(2*(1-pnorm(abs((fixef(prop6.nosqrt)/sqrt(diag(vcov(prop6.nosqrt))))))), digits =2)

## (Intercept)  difficulty       moves       trial       level 
##        0.00        0.06        0.00        0.04        0.73

5.3 Control for coder

Another approach: analyze coder as a random effect - this is maybe more correct?

Doesn’t converge with full predictor set (likely due to sparsity of trials by coder).

prop6.coder <- lmer(prop.moves2 ~ difficulty + moves + trial +  
                      (1 | coder) + 
                      (difficulty + moves + trial | subnum), 
                    data = all.cor)

## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?

kable(summary(prop6.coder)$coefficients, digits = 3)

	Estimate	Std. Error	t value
(Intercept)	0.935	0.047	19.881
difficulty	0.018	0.008	2.198
moves	-0.015	0.002	-9.803
trial	0.002	0.001	2.034

round(2*(1-pnorm(abs((fixef(prop6.coder)/sqrt(diag(vcov(prop6.coder))))))), digits= 2)

## (Intercept)  difficulty       moves       trial 
##        0.00        0.03        0.00        0.04

5.4 Plot

at each level, split problems into harder and easier

ms <- aggregate(difficulty ~ moves + subnum, data=data, mean)
ms1 <- aggregate(difficulty ~ moves, data=ms, median)
names(ms1) <- c("moves","mdif")
all.cor2 <- merge(all.cor, ms1, by="moves", all.x=T)
all.cor <- all.cor2
all.cor$levdif <- ifelse(all.cor$difficulty < all.cor$mdif, "Below Median (Easier)", "At or Above Median (Harder)")

prop moves by # of moves and high/low dif

all.cor$Difficulty_Category <- all.cor$levdif
all.cor$Subjective_Difficulty <- all.cor$Difficulty_Category
ms <- aggregate(prop.moves2 ~ moves + Subjective_Difficulty + subnum, data=all.cor, mean)
ms1 <- aggregate(prop.moves2 ~ moves + Subjective_Difficulty, data=ms, mean)
ms1$measure.type <- "Proportion of Moves"
ms1$measure <- ms1$prop.moves2
ms <- bind_rows(ms1)

qplot(moves,measure, colour=Subjective_Difficulty,label=c("Subjective Difficulty"), data=ms, ylab=c("Proportion of Steps Gestured"), xlab=c("Number of Steps Required"), ylim=c(0,1.5)) +
  #   facet_grid(measure.type ~ .) + 
  geom_smooth() +
  theme_bw(base_size=14) +
  scale_color_brewer(palette="Set1")+
  theme(legend.justification = c(0,0), legend.position=c(.6,.7))

By coder.

ms <- aggregate(prop.moves2 ~ moves + Subjective_Difficulty + subnum + coder, data=all.cor, mean)
ms1 <- aggregate(prop.moves2 ~ moves + Subjective_Difficulty + coder, data=ms, mean)
ms1$measure.type <- "Proportion of Moves"
ms1$measure <- ms1$prop.moves2
ms <- bind_rows(ms1)

qplot(moves,measure, colour=Subjective_Difficulty,label=c("Subjective Difficulty"), data=ms, ylab=c("Proportion of Steps Gestured"), xlab=c("Number of Steps Required"), ylim=c(0,1.5)) +
  facet_grid(coder ~ .) +
  geom_smooth() +
  theme_bw(base_size=14) +
  scale_color_brewer(palette="Set1")+
  theme(legend.justification = c(0,0), legend.position=c(.6,.7))

## Warning: Removed 2 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 28.925

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 10.075

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 36.906

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small.
## fewer data values than degrees of freedom.

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 28.925

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 10.075

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 36.906

## Warning: Removed 2 rows containing missing values (geom_point).

Abacus gesture study 1

Brooks et al.

August 31, 2016

1 Read in data

2 Descriptives

3 Statistical models

3.1 Basic model

3.2 Control for coder

3.3 Binomial analyses

4 Plot

5 Skips analysis

5.1 Model

5.2 No square-root transform

5.3 Control for coder

5.4 Plot