Quadrilateral learning experiments

Preliminaries.

rm(list=ls())
library(dplyr)

## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(ggplot2)
library(binom)
library(lme4)

## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following object is masked from 'package:tidyr':
## 
##     expand

library(bootstrap)
library(magrittr)

## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:tidyr':
## 
##     extract

library(stringr)

## for bootstrapping 95% confidence intervals
theta <- function(x,xdata,na.rm=T) {mean(xdata[x],na.rm=na.rm)}
ci.low <- function(x,na.rm=T) {
  mean(x,na.rm=na.rm) - quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.025,na.rm=na.rm)}
ci.high <- function(x,na.rm=T) {
  quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.975,na.rm=na.rm) - mean(x,na.rm=na.rm)}

theme_set(theme_bw())

Load data.

d1 <- read.csv("../data/experiment_1.tsv", header=TRUE, 
              sep="\t", row.names=NULL, stringsAsFactors = FALSE)
d2 <- read.csv("../data/experiment_2.tsv", header=TRUE, 
               sep="\t", row.names=NULL, stringsAsFactors = FALSE)
d3 <- read.csv("../data/experiment_3.tsv", header=TRUE, 
               sep="\t", row.names=NULL, stringsAsFactors = FALSE)

rd <- bind_rows(d1, d2, d3)

Constants:

Are all squares parallelograms?
Are all squares rectangles?
Are all squares also rhombuses?
Are all rectangles also parallelograms?
Are all rectangles also rhombuses?
Are all rectangles also squares?
Are all rhombuses also parallelograms?
Are all rhombuses also rectangles?
Are all rhombuses also squares?
Are all parallelograms also rectangles?
Are all parallelograms also rhombuses?
Are all parallelograms also squares?

questions <- c("sr","sh","sp","rs","rh","rp","hs","hr","hp","ps","pr","ph")

correct_answers <- c(1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0) 
answer_key <- 1 - correct_answers ## a yes answer is coded as "0" in the data

related <- list(square = which(grepl("s", questions)),
                rectangle = which(grepl("r", questions)),
                rhombus = which(grepl("h", questions)),
                parallel = which(grepl("p", questions)))

Data cleaning

shape numbers: 0 square, 1 rectangle, 2 rhombus, 3 parallelogram
conditions: 3 Active learning, 4 Passive learning, 5 Baseline condition

d <- rd %>% 
  select(workerid, starts_with("Answer")) %>%
  rename(pre = Answer.pretest_responses_by_presented_order,
         post = Answer.posttest_responses_by_presented_order,
         ord = Answer.permutations_used,
         time = Answer.training_time, 
         shape = Answer.shape_of_focus,
         cond = Answer.training_regime) %>%
  select(-starts_with("Answer")) %>%
  rowwise %>%
  do(data.frame(workerid = .$workerid, 
                pre = as.integer(strsplit(substr(.$pre, 2, 24), ",")[[1]]),                
                post = as.integer(strsplit(substr(.$post, 2, 24), ",")[[1]]),
                time = .$time, 
                # note zero indexing of orders, that's why +1                                           
                qnum = as.integer(strsplit(substr(.$ord, 2, 26), ",")[[1]]) + 1, 
                cond = .$cond,
                shape = .$shape)) %>%
  gather(phase, resp, pre, post) %>%
  mutate(cond = factor(cond, 
                       levels = c(3,4,5), 
                       labels = c("active", "teaching", "baseline")),
         shape = factor(shape, 
                        levels = c(0, 1, 2, 3), 
                        labels = c("square","rectangle","rhombus","parallel")),
         question = questions[qnum], 
         answer = answer_key[qnum],
         correct = resp == answer) %>%
  rowwise() %>%
  mutate(relevant = qnum %in% related[shape][[1]])

## Warning in rbind_all(out[[1]]): Unequal factor levels: coercing to
## character

Analysis

Between subjects

ms <- d %>% 
  group_by(cond, shape, phase, question, relevant) %>%
  summarise(m.cih = ci.high(correct),
            m.cil = ci.low(correct),            
            m = mean(correct))

## Warning: Grouping rowwise data frame strips rowwise nature

qplot(question, m, fill = phase, 
      ymin = m - m.cil, ymax = m + m.cih,
      position = position_dodge(width = .9),
      alpha = relevant, 
      facets = shape ~ cond, 
      data = ms,       
      geom = c("bar", "linerange"), stat = "identity")

Within subjects change scores

ms <- d %>% 
  select(-resp) %>% 
  spread(phase, correct) %>%  
  group_by(cond, shape, question, relevant) %>%
  summarise(m.cih = ci.high(post - pre),
            m.cil = ci.low(post - pre),            
            m = mean(post - pre))

qplot(question, m, fill = relevant,
      ymin = m - m.cil, ymax = m + m.cih,
      position = position_dodge(width = .9),
      facets = shape ~ cond, 
      data = ms,       
      geom = c("bar", "linerange"), stat = "identity")

Within subjects but aggregated across questions

ms <- d %>% 
  select(-resp) %>% 
  spread(phase, correct) %>%  
  group_by(workerid, cond, shape, relevant) %>%
  summarise(m = mean(post - pre)) %>%
  group_by(cond, shape, relevant) %>%
  summarise(m.cih = ci.high(m),
            m.cil = ci.low(m),
            m = mean(m), 
            n = n())

ms

## Source: local data frame [14 x 7]
## Groups: cond, shape
## 
##        cond     shape relevant      m.cih      m.cil             m  n
## 1    active rectangle    FALSE 0.03794192 0.03787879 -1.767677e-02 66
## 2    active rectangle     TRUE 0.03787879 0.04040404 -5.050505e-03 66
## 3    active   rhombus    FALSE 0.03340278 0.03062500  1.388889e-02 60
## 4    active   rhombus     TRUE 0.05277778 0.05000000 -2.777778e-03 60
## 5    active  parallel    FALSE 0.03961864 0.03954802 -1.129944e-02 59
## 6    active  parallel     TRUE 0.04802260 0.04802260  2.824859e-02 59
## 7  teaching rectangle    FALSE 0.05208333 0.05208333 -3.472222e-03 48
## 8  teaching rectangle     TRUE 0.04861111 0.05208333  2.777778e-02 48
## 9  teaching   rhombus    FALSE 0.02777778 0.02777778  1.010101e-02 66
## 10 teaching   rhombus     TRUE 0.04927399 0.05050505  5.176768e-02 66
## 11 teaching  parallel    FALSE 0.04232804 0.03968254  2.910053e-02 63
## 12 teaching  parallel     TRUE 0.05026455 0.05291005  2.910053e-02 63
## 13 baseline   rhombus    FALSE 0.06306306 0.05405405  4.954955e-02 37
## 14 baseline   rhombus     TRUE 0.06306306 0.05405405  7.445277e-19 37

qplot(cond, m, fill = relevant,
      ymin = m - m.cil, ymax = m + m.cih,
      position = position_dodge(width = .9),
      facets = . ~ shape, 
      data = ms,       
      geom = c("bar", "linerange"), stat = "identity")

Basic analysis over all questions - pretest only. Sanity check that we are getting reasonable answers?

ms <- d %>% 
  filter(phase == "pre") %>%
  group_by(workerid, question) %>%
  summarise(m = mean(1 - resp)) %>%
  group_by(question) %>%
  summarise(m.cih = ci.high(m),
            m.cil = ci.low(m),
            m = mean(m))

qplot(question, m, 
      ymin = m - m.cil, ymax = m + m.cih,
      position = position_dodge(width = .9),
      data = ms,       
      geom = c("bar", "linerange"), stat = "identity") + 
  geom_hline(yintercept = .5, lty = 2)

Quadrilateral learning experiments

Andres Gomez Emilsson and Mike Frank

May 17, 2015

Data cleaning

Analysis