mo— title: “Gaze-Xsit Adult” author: “Allison Dods & Kyle MacDonald” date: “February 2, 2016” output: html_document —

knitr::opts_chunk$set(echo=T, warning=F, cache=T, message=F, sanitize = T)

This script munges and analyzes the data for a project exploring the association between allocation of attention and cross-situational word learning in more or less ambiguous contexts.

rm(list = ls())
library(dplyr)
library(readr)
library(magrittr)
library(ggplot2)
library(tidyr)
library(knitr)
library(langcog)
library(lme4)
library(directlabels)
theme_set(theme_bw())

Load data and filter out unusable participants.

## output: d (mastersheet with times, trial, subject, condition, and gazes)
## input: processed data spreadsheet, subject info spreadsheet, trial info

#read in processed data
d <- read_csv("../data/processed_data/processed_adult_data.csv")

# join subject info
subinfo <- read_csv("../info/subinfo_adult_n.csv")

d %<>% 
  mutate(subid = gsub(".txt", "", subid)) %>%
  left_join(subinfo)
  
# join trial info
trialinfo <- read_csv("../info/trialinfo.csv")
d %<>% left_join(trialinfo, by = "stimulus")

# filter out unusable participants 

##calculate percentage of non-looks - that is, looking away from the screen entirely.
##offscreen == TRUE if x is 0 and y is 1050, so higher bad_looks are worse
d_screentime <- transform(d, offscreen = ((x == 0 & y == 1050) | is.na(x) | is.na(y))) %>%
  select(subid, t, offscreen) %>%
  group_by(subid) %>%
  summarise(bad_looks = sum(offscreen, na.rm = TRUE), total_timeslices = n())

d_screentime <- transform(d_screentime, percent_bad_looks = bad_looks/total_timeslices)


##filter out participants more than 1 standard dev away from mean screentime
a <- sd(d_screentime$percent_bad_looks)
b <- mean(d_screentime$percent_bad_looks)

d %<>%
  left_join(d_screentime)

d$keep <- ifelse(d$percent_bad_looks > (a+b), 'n', 'y')
d %<>% filter(keep == "y")

Some table or way of keeping track of how many participants were removed and for what reason.

We remove participants XX who: a)

Descriptives

Goal: get a table with the following information

d %>% 
    select(subid, condition) %>% 
    unique() %>% 
    group_by(condition) %>% 
    summarise(n_subs = n()) %>% 
    kable()
condition n_subs
gaze 10
nogaze 11
d_descrip <- d %>% 
    group_by(subid) %>%
    select(subid, shortname, trial.type) %>% 
    unique() %>% 
    group_by(subid) %>% 
    summarise(n_trials = n()) 

d_descrip %>% kable()
subid n_trials
020816_01 36
020916_01 36
020916_02 36
020916_03 36
021016_01 36
021016_02 36
021216_01 36
021216_02 36
021616_01 36
021616_03 36
021616_04 36
021716_01 36
021916_01 36
021916_02 36
021916_03 36
021916_04 36
021916_05 36
021916_06 36
021916_07 36
021916_08 36
051216_04 36

Data cleaning/munging

Here we label Areas of Interest (participant_looking_char) to be Left Picture, Right Picture, and the Center Face. We also add trial number variable.

KM question: where did these AOI numbers come from?

d %<>% mutate(participant_looking_factor = factor(ifelse(x > 210 & x < 750 & y < 540, "left", 
                           ifelse(x > 1170 & x < 1710 & y < 540, "right", 
                                  ifelse(x < 1230 & x > 690 & y > 540, "face", "away")))),
              participant_looking_char = as.character(participant_looking_factor))

# add trial numbers
trial_num <- c(c("fam", "fam", "fam", "fam"), seq(1,16))
d %<>% 
    select(subid, shortname) %>% 
    unique() %>% 
    group_by(subid) %>% 
    cbind(trial_num) %>% 
    mutate(trial_num = as.character(trial_num)) %>% 
    left_join(., d, by = c("subid", "shortname"))

Add correct column for exposure and test trials. A time slice is correct if the participant looked at the object that was kept from exposure to test.

We also add a column encoding whether the participant looked at the gaze target during exposure trials. This is only relevant for the gaze condition. In the no-gaze condition, each time slice will be False.

d %<>% mutate(correct = ifelse(target_object == participant_looking_char, TRUE, FALSE),
              gaze_follow = ifelse(look == participant_looking_char, TRUE, FALSE))

Trial level filtering

Remove trials with slow RTs?

Remove trials where ss did not follow gaze on exposure?

Munging to get performance on exposure trials

Next we create a data frame with just exposure trials for visualization and analysis.

First, we need to get a crosstabs of where particpants were looking during each trial. The target_object variable encodes the object that will appear again during test. It does not have to be the target of gaze.

Note the use of the complete() function from the tidyr package for completing missing combinations of data. It turns implicitly missing values into explicitly missing values.

# create exposure trial data frame
all.exp <- d %>% 
  filter((trial.type == "exposure" & 
           participant_looking_char %in% c("left", "right"))) %>%
    group_by(subid, trial_num, condition, participant_looking_char, trial.type) %>% 
    summarise(count = n()) %>% 
    ungroup() %>% 
    complete(subid, trial_num, participant_looking_char, 
             fill = list(count = 0)) %>%
    arrange(subid) %>% 
  filter(is.na(trial.type) == F) %>% 
    select(-condition) # here we remove condition since it has NAs after the complete function 

# add condition and where gaze was directed 
all.exp <- d %>% 
    filter(trial.type == "exposure" & participant_looking_char != "face") %>%
    select(subid, condition, trial_num, look) %>%
    unique() %>%
    left_join(all.exp, by = c("subid", "trial_num")) 

##double-check exposure trial numbers are consistent across participants
##all.exp group by participant subid & summarise n()

Get proportion correct and incorrect looking on exposure trials (looking at the object that will show up again at test) for each ss for each trial. We will use this to predict correct looking at test.

ss_correct_exposure <- d %>% 
    filter(trial.type == "exposure" & participant_looking_char %in% c("left", "right")) %>%
    group_by(subid, condition, trial_num = as.numeric(trial_num)) %>% 
    summarise(mean_accuracy_exp = mean(correct))

ggplot(aes(x=mean_accuracy_exp, fill = condition),
       data = ss_correct_exposure) +
    geom_density(alpha = 0.5) +
    facet_wrap(~condition) +
    xlab("Mean Accuracy Scores")

The distribution of correct looking is bimodal for the gaze condition and unimodal for the no-gaze condition. This makes sense and provides evidence that people were following gaze, and that they were distributing attention more broadly in the no-gaze condition.

Next we want to do a sanity check, that is, we want to make sure people were actually following gaze in the gaze condition.

ss_gf_exposure <- d %>% 
    filter(trial.type == "exposure", participant_looking_char %in% c("left", "right"), 
           condition == "gaze") %>%
    group_by(subid, condition, trial_num = as.numeric(trial_num)) %>% 
    summarise(mean_gf_exp = mean(gaze_follow))


ggplot(aes(x=mean_gf_exp),
       data = ss_gf_exposure) +
    geom_histogram() +
    xlab("Mean Gaze Following Scores") +
    ggtitle("Gaze Following on each Trial in Gaze Condition")

##think about people at low end; histogram

paste("Mean Gaze Following Score is:", round(mean(ss_gf_exposure$mean_gf_exp), 2))
## [1] "Mean Gaze Following Score is: 0.77"

Proportion gaze following on exposure trials in the gaze condition is around a mean of 77%. And the distribution is skewed heavily towards 1, meaning people followed gaze.

Munging to get performance on test trials

Next we want to get each participant’s target looking on test trials.

ss_acc_test <- d %>% 
    filter(trial.type == "test", participant_looking_char %in% c("left", "right")) %>%
    group_by(subid, condition, trial_num = as.numeric(trial_num)) %>% 
    summarise(mean_accuracy = mean(correct))

ggplot(aes(x=mean_accuracy),
       data = ss_acc_test) +
    geom_density() +
    xlab("Mean Accuracy") +
    facet_wrap(~condition)

Visualizations

Continuous accuracy scores

ss_acc_all <- d %>% 
    filter(trial.type != "familiar", participant_looking_char %in% c("left", "right")) %>%
    group_by(subid, condition, trial_num = as.numeric(trial_num), trial.type) %>% 
    summarise(mean_accuracy = mean(correct)) %>% 
    spread(trial.type, mean_accuracy)

# plot the relation between acc on exposure and acc on test
ggplot(aes(x = exposure, y = test, color = condition), data = ss_acc_all) +
    geom_smooth(method = "lm", se = T, size = 2) +
  scale_color_solarized() +
  geom_point(alpha = 0.6) 

Bin participants/trials based on exposure looking (same/switch analysis).

ss_acc_all$exptarget_bin <- cut(ss_acc_all$exposure, breaks = 4)
    
ms <- ss_acc_all %>% 
  group_by(subid, exptarget_bin, condition) %>%
  summarise(mean_acc = mean(test, na.rm = T)) %>% 
  group_by(exptarget_bin, condition) %>% 
  multi_boot_standard(column = "mean_acc", na.rm = T) %>% 
  filter(is.na(exptarget_bin) == F)

Plot.

ggplot(aes(x = exptarget_bin, y = mean, fill = condition), data = ms) +
    geom_bar(stat = "identity", position = position_dodge()) +
    geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), position = position_dodge(width = 0.9)) +
  scale_fill_solarized()

Model

Question: For each condition, does the amount of time looking at the object during exposure trials predict the participants’ success (higher accuracy) on test trials?

Define Same and Switch trials are inferred based on participant looking during exposure trials.

Predictions:

Models:

  1. Predicting looking behavior (proportion correct or proportion looking to the kept object) based on condition
  2. Predicting looking behavior based on trial type (Same/Switch binned based on looking behavior during exposure trials)
m1.cont <- lmer(test ~ exposure * condition + (1|subid), data = ss_acc_all)
kable(summary(m1.cont)$coef)
Estimate Std. Error t value
(Intercept) 0.4462660 0.0435020 10.258519
exposure 0.4751043 0.0547501 8.677682
conditionnogaze 0.0709338 0.0642570 1.103908
exposure:conditionnogaze -0.2747605 0.0936914 -2.932613

Categorical model with exposure looking binned into 4 groups.

m2.cat <- lmer(test ~ exptarget_bin * condition + (1|subid), data = ss_acc_all) 
kable(summary(m2.cat)$coef)
Estimate Std. Error t value
(Intercept) 0.4920768 0.0429886 11.4466903
exptarget_bin(0.25,0.5] 0.0675767 0.0616724 1.0957359
exptarget_bin(0.5,0.75] 0.3275893 0.0629629 5.2028972
exptarget_bin(0.75,1] 0.3753680 0.0462510 8.1158836
conditionnogaze 0.0141119 0.0660281 0.2137248
exptarget_bin(0.25,0.5]:conditionnogaze 0.0524680 0.0836933 0.6269079
exptarget_bin(0.5,0.75]:conditionnogaze -0.1976916 0.0852319 -2.3194560
exptarget_bin(0.75,1]:conditionnogaze -0.2155952 0.0797736 -2.7025877