library(ggplot2)
library(directlabels)
library(car)
library(lme4)
library(langcog)
library(dplyr)
library(knitr)
# library(plyr)
rm(list=ls())
theme_set(theme_mikabr() +
theme(panel.grid = element_blank(),
strip.background = element_blank()))
font <- "Open Sans"
data = read.csv("data/study1/Combined 2011 2012 coded data 11-25-13.csv")
Data cleaning.
data$rdif=round(data$difficulty)
data$size.01 <- ifelse(data$size > 2, 1, ifelse(data$size < 3, 0, NA))
data$g.present = ifelse(data$g.present==9, NA, data$g.present)
data = data[data$subnum!=2455,]
Proportion of trials containing gesture is 0.952529.
Now look at the the trials with no gesture.
length(data[data$g.present==0 & !is.na(data$g.present),]$g.present) #168 trials
## [1] 168
nog <- data[data$g.present==0 & !is.na(data$g.present),]
hist(nog$level)
sort(nog$level)
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [24] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [47] 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2
## [70] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [93] 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [116] 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 5
## [139] 5 5 5 5 5 5 5 5 5 6 6 6 6 6 7 8 9 9 9 9 10 11 11
## [162] 12 12 12 13 13 13 13
129 (77%) were on trials of level 3 or lower, meaning that there were only 2 addends in the problem.
consider the following variables: level, difficulty, # of moves,trial
hist(data$level)
hist(data$difficulty)
hist(data$moves)
hist(data$trial)
sizefull <- lmer(size ~ difficulty + moves + trial + log(level) +
(difficulty + moves + trial + log(level)| subnum),
data=data)
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 2 negative
## eigenvalues
And show coefficients in a nice table.
kable(summary(sizefull)$coefficients, digits = 3)
Estimate | Std. Error | t value | |
---|---|---|---|
(Intercept) | 1.958 | 0.122 | 16.055 |
difficulty | 0.036 | 0.019 | 1.939 |
moves | -0.004 | 0.004 | -1.179 |
trial | 0.001 | 0.002 | 0.812 |
log(level) | 0.186 | 0.058 | 3.233 |
and p-vals by t=z
2*(1-pnorm(abs((fixef(sizefull)/sqrt(diag(vcov(sizefull)))))))
## (Intercept) difficulty moves trial log(level)
## 0.000000000 0.052471109 0.238252612 0.416566441 0.001225578
Do we still see effects of difficulty and level? do these interact with coder? Yes and yes.
sizefull.coder <- lmer(size ~ difficulty + log(level) +
coder * difficulty + coder * level +
(difficulty + log(level) | subnum),
data=data)
kable(summary(sizefull.coder)$coefficients, digits = 3)
Estimate | Std. Error | t value | |
---|---|---|---|
(Intercept) | 2.630 | 0.246 | 10.713 |
difficulty | 0.091 | 0.039 | 2.373 |
log(level) | 0.288 | 0.071 | 4.081 |
coderCK | -1.086 | 0.205 | -5.308 |
coderES | -1.093 | 0.225 | -4.853 |
coderHF | -0.598 | 0.232 | -2.574 |
coderLT | -0.537 | 0.137 | -3.907 |
coderMH | -0.500 | 0.309 | -1.615 |
coderRESOLVED | -0.315 | 0.195 | -1.618 |
level | -0.101 | 0.037 | -2.748 |
difficulty:coderCK | -0.094 | 0.033 | -2.819 |
difficulty:coderES | -0.119 | 0.038 | -3.150 |
difficulty:coderHF | -0.042 | 0.041 | -1.041 |
difficulty:coderLT | -0.041 | 0.023 | -1.808 |
difficulty:coderMH | 0.032 | 0.054 | 0.590 |
difficulty:coderRESOLVED | -0.023 | 0.029 | -0.784 |
coderCK:level | 0.114 | 0.032 | 3.600 |
coderES:level | 0.110 | 0.035 | 3.145 |
coderHF:level | 0.089 | 0.035 | 2.523 |
coderLT:level | 0.071 | 0.023 | 3.120 |
coderMH:level | 0.043 | 0.048 | 0.882 |
coderRESOLVED:level | 0.031 | 0.030 | 1.036 |
Another approach: analyze coder as a random effect - this is maybe more correct?
sizefull.coder <- lmer(size ~ difficulty + moves + trial + log(level) +
(difficulty + moves + trial + log(level) | coder) +
(difficulty + moves + trial + log(level) | subnum),
data=data)
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 10 negative
## eigenvalues
kable(summary(sizefull.coder)$coefficients, digits = 3)
Estimate | Std. Error | t value | |
---|---|---|---|
(Intercept) | 2.048 | 0.228 | 8.993 |
difficulty | 0.052 | 0.167 | 0.309 |
moves | -0.007 | 0.089 | -0.076 |
trial | 0.000 | 0.189 | 0.000 |
log(level) | 0.233 | 0.175 | 1.328 |
size.allfour.binom <- glmer(size.01 ~ difficulty + moves + trial + log(level) +
(difficulty + moves + trial + log(level)| subnum),
data = data, family="binomial")
## Warning in optwrap(optimizer, devfun, start, rho$lower, control =
## control, : convergence code 1 from bobyqa: bobyqa -- maximum number of
## function evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
And show coefficients.
kable(summary(size.allfour.binom)$coefficients, digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | -2.819 | 1.047 | -2.693 | 0.007 |
difficulty | 0.329 | 0.156 | 2.106 | 0.035 |
moves | -0.027 | 0.030 | -0.880 | 0.379 |
trial | -0.010 | 0.015 | -0.683 | 0.495 |
log(level) | 0.757 | 0.637 | 1.189 | 0.235 |
and p-vals by t=z
2*(1-pnorm(abs((fixef(size.allfour.binom)/sqrt(diag(vcov(size.allfour.binom)))))))
## (Intercept) difficulty moves trial log(level)
## 0.007081421 0.035196425 0.379124356 0.494586625 0.234561425
Size by level and difficulty graph
get median difficulty level by problem level
ms <- aggregate(difficulty ~ level + subnum, data=data, mean)
meds <- aggregate(difficulty ~ level, data=ms, median)
names(meds) <- c("level","med")
data2 <- merge(data, meds, by="level", all.x=T)
data <- data2
data$Difficulty_Category <- ifelse(data$difficulty < data$med,
"Below Median Difficulty (Easier)",
"At or Above Median Difficulty (Harder)")
More aggregation.
data$Subjective_Difficulty <- data$Difficulty_Category
ms1 <- aggregate(size ~ level + Subjective_Difficulty + subnum, data=data, mean)
ms2 <- aggregate(size ~ level + Subjective_Difficulty, data=ms1, mean)
data$Subjective_Difficulty <- factor(data$Subjective_Difficulty)
ms2$measure.type <- "Gesture Size"
ms2$measure <- ms2$size
ms <- bind_rows(ms2)
And plot.
qplot(level,measure, colour=Subjective_Difficulty,
label=c(Subjective_Difficulty),
data=ms,
ylab=c("Average Gesture Size"),
xlab=c("Problem Level (Number of Addends - 1)"), ylim=c(1,4)) +
geom_smooth() +
theme_bw(base_size=14) +
scale_color_brewer(palette="Set1")+
theme(legend.justification = c(0,0), legend.position=c(.6,.7))
And by coder.
ms1 <- aggregate(size ~ level + Subjective_Difficulty + subnum + coder, data=data, mean)
ms2 <- aggregate(size ~ level + Subjective_Difficulty + coder, data=ms1, mean)
data$Subjective_Difficulty <- factor(data$Subjective_Difficulty)
ms2$measure.type <- "Gesture Size"
ms2$measure <- ms2$size
ms <- bind_rows(ms2)
qplot(level,measure, colour=Subjective_Difficulty,
label=c(Subjective_Difficulty),
data=ms,
ylab=c("Average Gesture Size"),
xlab=c("Problem Level (Number of Addends - 1)"), ylim=c(1,4)) +
geom_smooth() +
facet_wrap(~coder) +
theme_bw(base_size=14) +
scale_color_brewer(palette="Set1")+
theme(legend.justification = c(0,0), legend.position="bottom")
all.cor <- data[data$correct==1,]
all.cor$rdif <- round(all.cor$difficulty)
all.cor$mean <- (all.cor$movesmin + all.cor$movesmax)/2
all.cor$prop.moves2 <- all.cor$mean / all.cor$moves
Histograms.
qplot(prop.moves2, data=all.cor)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1212 rows containing non-finite values (stat_bin).
Square root transform to increase normality.
qplot(sqrt(prop.moves2), data=all.cor)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1212 rows containing non-finite values (stat_bin).
Prop moves by difficulty model.
prop6 <- lmer(sqrt(prop.moves2) ~ difficulty + moves + trial+level+
(1+difficulty + moves + trial+level|subnum), data = all.cor) # prop.moves by difficulty
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
And show coefficients.
kable(summary(prop6)$coefficients, digits = 3)
Estimate | Std. Error | t value | |
---|---|---|---|
(Intercept) | 0.956 | 0.037 | 25.999 |
difficulty | 0.014 | 0.006 | 2.319 |
moves | -0.010 | 0.002 | -6.633 |
trial | 0.001 | 0.000 | 2.196 |
level | 0.002 | 0.008 | 0.195 |
and p-vals by t=z
round(2*(1-pnorm(abs((fixef(prop6)/sqrt(diag(vcov(prop6))))))), digits=2)
## (Intercept) difficulty moves trial level
## 0.00 0.02 0.00 0.03 0.85
prop6.nosqrt <- lmer(prop.moves2 ~ difficulty + moves + trial+level+
(1+difficulty + moves + trial+level|subnum), data = all.cor)
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 1.72672 (tol =
## 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
kable(summary(prop6.nosqrt)$coefficients, digits = 3)
Estimate | Std. Error | t value | |
---|---|---|---|
(Intercept) | 0.935 | 0.057 | 16.482 |
difficulty | 0.018 | 0.009 | 1.915 |
moves | -0.014 | 0.003 | -5.411 |
trial | 0.002 | 0.001 | 2.082 |
level | -0.004 | 0.013 | -0.348 |
round(2*(1-pnorm(abs((fixef(prop6.nosqrt)/sqrt(diag(vcov(prop6.nosqrt))))))), digits =2)
## (Intercept) difficulty moves trial level
## 0.00 0.06 0.00 0.04 0.73
Another approach: analyze coder as a random effect - this is maybe more correct?
Doesn’t converge with full predictor set (likely due to sparsity of trials by coder).
prop6.coder <- lmer(prop.moves2 ~ difficulty + moves + trial +
(1 | coder) +
(difficulty + moves + trial | subnum),
data = all.cor)
## Warning in optwrap(optimizer, devfun, getStart(start, rho$lower, rho$pp), :
## convergence code 1 from bobyqa: bobyqa -- maximum number of function
## evaluations exceeded
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
kable(summary(prop6.coder)$coefficients, digits = 3)
Estimate | Std. Error | t value | |
---|---|---|---|
(Intercept) | 0.935 | 0.047 | 19.881 |
difficulty | 0.018 | 0.008 | 2.198 |
moves | -0.015 | 0.002 | -9.803 |
trial | 0.002 | 0.001 | 2.034 |
round(2*(1-pnorm(abs((fixef(prop6.coder)/sqrt(diag(vcov(prop6.coder))))))), digits= 2)
## (Intercept) difficulty moves trial
## 0.00 0.03 0.00 0.04
at each level, split problems into harder and easier
ms <- aggregate(difficulty ~ moves + subnum, data=data, mean)
ms1 <- aggregate(difficulty ~ moves, data=ms, median)
names(ms1) <- c("moves","mdif")
all.cor2 <- merge(all.cor, ms1, by="moves", all.x=T)
all.cor <- all.cor2
all.cor$levdif <- ifelse(all.cor$difficulty < all.cor$mdif, "Below Median (Easier)", "At or Above Median (Harder)")
prop moves by # of moves and high/low dif
all.cor$Difficulty_Category <- all.cor$levdif
all.cor$Subjective_Difficulty <- all.cor$Difficulty_Category
ms <- aggregate(prop.moves2 ~ moves + Subjective_Difficulty + subnum, data=all.cor, mean)
ms1 <- aggregate(prop.moves2 ~ moves + Subjective_Difficulty, data=ms, mean)
ms1$measure.type <- "Proportion of Moves"
ms1$measure <- ms1$prop.moves2
ms <- bind_rows(ms1)
qplot(moves,measure, colour=Subjective_Difficulty,label=c("Subjective Difficulty"), data=ms, ylab=c("Proportion of Steps Gestured"), xlab=c("Number of Steps Required"), ylim=c(0,1.5)) +
# facet_grid(measure.type ~ .) +
geom_smooth() +
theme_bw(base_size=14) +
scale_color_brewer(palette="Set1")+
theme(legend.justification = c(0,0), legend.position=c(.6,.7))
By coder.
ms <- aggregate(prop.moves2 ~ moves + Subjective_Difficulty + subnum + coder, data=all.cor, mean)
ms1 <- aggregate(prop.moves2 ~ moves + Subjective_Difficulty + coder, data=ms, mean)
ms1$measure.type <- "Proportion of Moves"
ms1$measure <- ms1$prop.moves2
ms <- bind_rows(ms1)
qplot(moves,measure, colour=Subjective_Difficulty,label=c("Subjective Difficulty"), data=ms, ylab=c("Proportion of Steps Gestured"), xlab=c("Number of Steps Required"), ylim=c(0,1.5)) +
facet_grid(coder ~ .) +
geom_smooth() +
theme_bw(base_size=14) +
scale_color_brewer(palette="Set1")+
theme(legend.justification = c(0,0), legend.position=c(.6,.7))
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 28.925
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 10.075
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 36.906
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small.
## fewer data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 28.925
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 10.075
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 36.906
## Warning: Removed 2 rows containing missing values (geom_point).