library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.2
## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1     ✔ purrr   0.2.4
## ✔ tibble  1.3.4     ✔ dplyr   0.7.4
## ✔ tidyr   0.7.2     ✔ stringr 1.2.0
## ✔ readr   1.1.1     ✔ forcats 0.2.0
## Warning: package 'tidyr' was built under R version 3.4.2
## Warning: package 'dplyr' was built under R version 3.4.2
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggthemes)
library(langcog)
## 
## Attaching package: 'langcog'
## The following objects are masked from 'package:ggthemes':
## 
##     scale_color_solarized, scale_colour_solarized,
##     scale_fill_solarized
## The following object is masked from 'package:base':
## 
##     scale
library(knitr)

A couple of differences here frorm the earlier code. First, always use relative paths. Second, it’s a little bit better to create a single data frame in “tidy” format that you can use for everything.

## importing data
d <- bind_rows(read_csv(file="pref.data.csv"),
               read_csv(file="novel.data.csv")) %>%
  mutate(trial_type = ifelse(trial == "train", "train", "test")) 
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_integer(),
##   id = col_character(),
##   alltrial = col_integer(),
##   condition = col_character(),
##   agent = col_character(),
##   altAgent = col_character(),
##   leftObject = col_character(),
##   rightObject = col_character(),
##   pick = col_character(),
##   target = col_character(),
##   trial = col_character(),
##   change = col_character(),
##   rt = col_integer(),
##   correct = col_integer()
## )
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_integer(),
##   id = col_character(),
##   alltrial = col_integer(),
##   condition = col_character(),
##   agent = col_character(),
##   altAgent = col_character(),
##   leftObject = col_character(),
##   rightObject = col_character(),
##   pick = col_character(),
##   target = col_character(),
##   trial = col_character(),
##   change = col_character(),
##   rt = col_integer(),
##   correct = col_integer()
## )

One advantage is that you can use summarise and kable to create sanity checks that tell you about the whole dataset.

d %>%
  group_by(trial_type, condition) %>%
  summarise(n = length(unique(id)), 
            correct = mean(correct)) %>%
  knitr::kable(digits = 2)
trial_type condition n correct
test novelty 40 0.71
test preference 40 0.80
train novelty 40 0.98
train preference 40 1.00

This plot is equivalent to the earlier one.

ggplot(filter(d, trial_type == "test"),
       aes(x = change, y = correct, fill = change, frame = condition))+
  stat_summary_bin(aes(y = correct), fun.y = "mean", geom = "bar")+
  stat_summary(fun.data = "mean_cl_normal", colour = "black", 
               size = 1, width = .1, geom = "linerange", show.legend = FALSE)+
  facet_wrap(~ condition,
             labeller = as_labeller(c(`novelty`="Novelty", 
                                      `preference`="Preference")))+
  geom_hline(yintercept = 0.5, size = 0.5, lty=2)+
  labs(x="",y="Proportion Expected Choice")+
  ylim(0,1.05)+
  guides(fill = guide_legend(keywidth = 2, keyheight = 2))+
  theme(axis.text.x=element_blank(),axis.ticks.x=element_blank())+
  scale_fill_solarized(name="Speaker Change",
                       breaks=c("false", "true"),
                       labels=c("No", "Yes"))+
  theme_few(base_size = 12)+
  theme(axis.text.x=element_blank(), 
        axis.ticks.x=element_blank())
## Warning: Ignoring unknown parameters: width

But there is an issue with the error bars: stat_summary doesn’t know either that the data are binary OR what the units of grouping are for the observations. This second means that it’s not taking into account that you want error bars across subjects, which will be more conservative. The workflow below with langcog::multi_boot_standard does a slightly better job - note that the CI on the speaker change in the novelty condition looks pretty different.

ms <- d %>%
  filter(trial_type == "test") %>%
  group_by(change, condition, id) %>%
  summarise(correct = mean(correct)) %>%
  multi_boot_standard(col = "correct")

ggplot(ms, 
       aes(x = change, y = mean, fill = change, frame = condition)) +
  geom_bar(stat="identity") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  geom_hline(yintercept = 0.5, lty=2)+
  facet_wrap(~ condition) + 
  theme_few() + 
  scale_fill_solarized()

Order effects. Not sure what to make of this, but always good to look.

ms <- d %>%
  filter(trial_type == "test") %>%
  group_by(change, condition, trial, id) %>%
  summarise(correct = mean(correct)) %>%
  multi_boot_standard(col = "correct")

ggplot(ms, 
       aes(x = trial, y = mean, col = change)) +
  geom_line(aes(group= change)) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = position_dodge(width = .1)) + 
  geom_hline(yintercept = 0.5, lty=2)+
  facet_wrap(~ condition) + 
  theme_few() + 
  scale_colour_solarized()