Experiment 1 – Pilot

Date run: 11/20/2018

Experiment structure:

  1. First, participants rated the likelihood of success for each board used in the experiment (1/6, 3/6, 5/6; referred to as baseline trials).

  2. Second, participants learned about the task and answered some check questions (referred to as demo trials).

  3. Third, for the bulk of the study, participants underwent 36 unique trials with agents who attempted different dart boards and then rated the likelihood of each agent’s success on a dart board (referred to as test trials; 1 = definitely fail, 100 = definitely succeed).

  4. Finally, participants filled out a demographics survey.

Trial information:

The 36 trials varied with respect to:

  • Difficulty of observed dart board (1/6, 3/6, 5/6; 1/6 is the most difficult board)
  • Observed outcome (fail or success; outcome kept constant for each agent)
  • Amount of observations (1 attempt, 3 attempts)
  • Difficulty of new board (1/6, 3/6, 5/6)
  • Trial order and agent randomized
  • Half of the agents were male, half were female

Data structure:

  • order_baseline: order of baseline trials
  • order_demo: order of demo trials
  • order_trials: order of test trials
  • responses_baseline: responses for baseline trials
  • responses_demo: respones for check questions in beginning
  • responses_rating: responses for ratings on test trials
  • responses_observation: responses for check questions on observation pages

** Link to task: http://web.stanford.edu/~masaba/204project/difficulty_task.html **

** Link to model: https://github.com/masaba/darts_difficulty/blob/master/model_humanpriors **

Load Libraries

library(ggplot2)
library(lme4)
library(tidyverse)
library(readxl)
library(langcog) # https://github.com/langcog/langcog-package
library(psych)
library(viridis) #creating colorblind-friendly fills
library(gridExtra)
library(lmerTest)
library(compute.es)
library(metafor)
library(rjson)
library(stats)

Load Data

Demographics data frame.

setwd("../exp1/mturk/production-results/")

files = list.files(pattern = '*.json')

demog.data.raw <- data.frame() 

for (f in 1:length(files)) {
  #read in file
  jd <- fromJSON(file = files[f])
  
  demog.data.subj <- data.frame(
                          id = jd$AssignmentId, #don't use worker ID!
                          age = jd$answers$data$demographics[[1]][[1]]$value,
                          lang = jd$answers$data$demographics[[1]][[2]]$value,
                          race = jd$answers$data$demographics[[1]][[3]]$value,
                          gender = jd$answers$data$demographics[[1]][[4]]$value,
                          colorblind =jd$answers$data$demographics[[1]][[5]]$value,
                          focus = jd$answers$data$demographics[[1]][[6]]$value,
                          issues = jd$answers$data$demographics[[1]][[7]]$value)
  
  
  demog.data.raw <- bind_rows(demog.data.raw, demog.data.subj)
}

Baseline ratings data frame.

setwd("../exp1/mturk/production-results/")

files = list.files(pattern = '*.json')

baseline.data.raw <- data.frame() 

for (f in 1:length(files)) {
  #read in file
  jd <- fromJSON(file = files[f])
  
  baseline.data.subj <- data.frame(
                          id = jd$AssignmentId, #don't use worker ID!
                          trial = jd$answers$data$order_baseline,
                          responses = jd$answers$data$responses_baseline)
  
  
  baseline.data.raw <- bind_rows(baseline.data.raw, baseline.data.subj)
}

Demo check questions data frame.

#get data from ver1 participants
setwd("../exp1/mturk/production-results/")

files = list.files(pattern = '*.json')

check.data.raw <- data.frame() 

for (f in 1:length(files)) {
  #read in file
  jd <- fromJSON(file = files[f])
  
  check.data.subj <- data.frame(
                          id = jd$AssignmentId, #don't use worker ID!
                          order_demo = jd$answers$data$order_demo,
                          responses_demo = jd$answers$data$responses_demo)
  
  check.data.raw <- bind_rows(check.data.raw, check.data.subj)
}

Observation check questions data frame.

#get data from ver1 participants
setwd("../exp1/mturk/production-results/")

files = list.files(pattern = '*.json')

obs.data.raw <- data.frame() 

for (f in 1:length(files)) {
  #read in file
  jd <- fromJSON(file = files[f])
  
  obs.data.subj <- data.frame(
                          id = jd$AssignmentId, #don't use worker ID!
                       #   order_observation = jd$answers$data$order_observation,
                          responses_observation = jd$answers$data$responses_observation,
                          responses_observation_correct = jd$answers$data$correct_observation_responses)
  
  obs.data.raw <- bind_rows(obs.data.raw, obs.data.subj)
}

Ratings data frame.

#get data from ver1 participants
setwd("../exp1/mturk/production-results/")

files = list.files(pattern = '*.json')

rate.data.raw <- data.frame() 

for (f in 1:length(files)) {
  #read in file
  jd <- fromJSON(file = files[f])
  
  rate.data.subj <- data.frame(
                          id = jd$AssignmentId, #don't use worker ID!
                          trial = jd$answers$data$order_trials,
                          rating = jd$answers$data$responses_rating)

  rate.data.raw <- bind_rows(rate.data.raw, rate.data.subj)
}

Model predictions.

means = read.csv("means.csv")

Check Questions

Summarize demo check questions responses.

check.table = check.data.raw %>%
  mutate(correct = order_demo == responses_demo) %>%
  filter(correct == FALSE) %>%
  summarize(id = id,
            correct = sum(correct))

check.table
##                               id correct
## 1 338JKRMM27CG143XB0AER8F02S9AH1       0

1 person failed one of the intro/demo check questions.

Summarize observation check questions responses.

obs.table = obs.data.raw %>%
  mutate(responses_observation = factor(responses_observation, levels = c("fail","success"), labels = c("fail", "succ"))) %>%
  mutate(correct = responses_observation == responses_observation_correct) %>%
  filter(correct == FALSE) %>%
  group_by(id) %>%
  summarize(n = n(),
            corect = sum(correct))

obs.table
## # A tibble: 7 x 3
##   id                                 n corect
##   <chr>                          <int>  <int>
## 1 32XVDSJFP0ADOF94PIXCDV4Y7QGM2F     1      0
## 2 339ANSOTR6FM9CN3T95OLYJB7HIKIJ    10      0
## 3 3FDJT1UU75LAXOSOMPMUWPF62VCK59     1      0
## 4 3FTF2T8WLSVKTBOHETIEWGE3BZLW9R     1      0
## 5 3OS4RQUCRAS16IHJMOMAJYNT4VHFB3     1      0
## 6 3TYCR1GOTDWJO8UVED5B0TZGSUPLZK     1      0
## 7 3WT783CTPCUU36X9VMW9BS2Q7IPCBP     1      0

1 person missed 10 of these questions; 6 people missed 1.

Tidy Data

Tidy baseline data.

base.tidy = baseline.data.raw %>%
  mutate(responses = as.numeric(responses),
         rate_diff = factor(trial, levels=c("baseline_5","baseline_3","baseline_1"), labels=c("5/6", "3/6", "1/6")),
         amount = "baseline") %>%
  select(id, rate_diff, amount, responses) %>%
  filter(id != "339ANSOTR6FM9CN3T95OLYJB7HIKIJ", # remove person who failed 10 observation check questions
         id != "338JKRMM27CG143XB0AER8F02S9AH1") # remove person who failed intro check question)

Tidy ratings data.

rate.tidy = rate.data.raw %>%
  mutate(trial = str_sub(trial, 6, str_length(trial)), # remove names
         obs_diff = str_sub(trial, 1, 1),
         rate_diff = str_sub(trial, 3, 3),
         amount = str_sub(trial, 5, 5),
         outcome = str_sub(trial, 7, str_length(trial))) %>%
  mutate(id = factor(id),
         obs_diff = factor(obs_diff, levels=c(5, 3, 1), labels=c("5/6","3/6","1/6")),
         rate_diff = factor(rate_diff, levels=c(5, 3, 1), labels=c("5/6","3/6","1/6")),
         amount = factor(amount, levels=c(1, 3), labels = c("1 Observation", "3 Observations")),
         outcome = factor(outcome, levels=c("succ","fail"), labels=c("success","fail"))) %>%
  filter(id != "339ANSOTR6FM9CN3T95OLYJB7HIKIJ", # remove person who failed 10 observation check questions
         id != "338JKRMM27CG143XB0AER8F02S9AH1") # remove person who failed intro check question

# df for rating board difficulty 1
rate.tidy.hard = filter(rate.tidy, rate_diff == "hard")

# df for rating board difficulty 3
rate.tidy.med = filter(rate.tidy, rate_diff == "medium")

# df for rating board difficulty 5
rate.tidy.easy = filter(rate.tidy, rate_diff == "easy")

#join bsaeline data with ratings data
rate.base.tidy = full_join(rate.tidy, base.tidy)
## Joining, by = c("id", "rate_diff", "amount")
## Warning: Column `id` joining factor and character vector, coercing into
## character vector
## Warning: Column `amount` joining factor and character vector, coercing into
## character vector
rate.tidy.success = filter(rate.tidy, outcome == "success")

Demographics

dems = demog.data.raw %>%
  filter(id != "339ANSOTR6FM9CN3T95OLYJB7HIKIJ", # remove person who failed 10 observation check questions
         id != "338JKRMM27CG143XB0AER8F02S9AH1") %>% # remove person who failed 1 intro check question
  mutate(age = as.numeric(age)) %>%
#  group_by(id) %>%
  summarize(n=n(),
            mean_age = mean(age),
            sd_age = sd(age),
            min_age = min(age),
            max_age = max(age),
            female = sum(gender == "female"))

dems
##    n mean_age   sd_age min_age max_age female
## 1 98 36.19388 11.16018      21      72     31

Plots

Baseline difficulty judgments

# Histogram
ggplot(base.tidy, aes(x = responses, fill = rate_diff)) +
  geom_histogram(binwidth=10) +
  facet_grid(~ rate_diff) +
  theme_minimal() +
  ggtitle("Histogram of baseline success ratings") +
  xlab("Responses (1 = Definitely Fail; 100 = Definitely Succeed")

# Density plot
ggplot(base.tidy) +
  geom_density(aes(x = responses, fill =rate_diff)) +
  #facet_grid(~ board) +
  theme_minimal() +
  ggtitle("Density plots of baseline success ratings")+
  xlab("Responses (1 = Definitely Fail; 100 = Definitely Succeed)") +
  facet_grid (~rate_diff) +
  scale_color_brewer(palette="Dark2",direction=-1)

# Boxplot
ggplot(base.tidy, aes(x = rate_diff, y=responses,color=rate_diff)) +
  geom_boxplot(width=.5) +
  coord_cartesian(ylim=c(1,100)) +
  theme_minimal() +
  xlab("Board Type") +
  ylab("Rating (1-100)") +
  ggtitle("Boxplot of baseline success ratings")+
  xlab("Responses (1 = Definitely Fail; 100 = Definitely Succeed")

Ratings boxplots facetted by the difficulty of the rating board.

ggplot(rate.tidy, aes(x = obs_diff, y=as.numeric(rating),color=outcome)) +
  #geom_violin() +
  geom_boxplot(width=.5) +
  facet_grid(amount ~ rate_diff) +
  theme_minimal() +
  xlab("Difficulty of Observed Board") +
  ylab("Rating (1-100)") +
    coord_cartesian(ylim=c(1,100)) +
#  theme(legend.title = "Observed Board") +
  ggtitle("Likelihood of success on each board") +
  theme(panel.spacing = unit(2, "lines")) +
  scale_color_brewer(palette="Dark2")

Density plots.

ggplot(rate.tidy) +
  geom_density(aes(x=round(as.numeric(rating),0),fill=obs_diff),alpha=.4) +
  facet_grid(outcome ~ rate_diff) +
  theme_minimal() +
  xlab("Rating") +
  ylab("Density")

  #ggtitle("Likelihood of success on 5/6 (easy) board") +
 # scale_color_brewer(palette="Dark2")

#only success
ggplot(rate.tidy.success) +
  geom_density(aes(x=as.numeric(rating),fill=amount),alpha=.4) +
  facet_grid(obs_diff ~ rate_diff) +
  xlab("Rating") +
  ylab("Density") +
 # scale_fill_manual(c("purple2","lightgoldenrod2")) +
  scale_fill_brewer(type = 'div', palette = 'Set3', direction = -1) +
    theme_minimal()

  #ggtitle("Likelihood of success on 5/6 (easy) board") +
 # scale_color_brewer(palette="Dark2")

Correlation plot.

means$rate_diff = factor(means$rate_diff, levels=c("easy","medium","hard"))

ggplot(means, aes( x=model_empirical, y=participants,colour =rate_diff)) +
  geom_point() +
  theme_minimal() +
  ylab("Participants") +
  xlab("Model Predictions (Empirical priors)") +
#  ylim(0,1) +
#  geom_abline(intercept = 0, slope = 1) +
  geom_smooth(method="lm", se = FALSE) +
  scale_y_continuous(breaks=seq(0, 1.2, 0.25)) +
  scale_x_continuous(breaks=seq(0, 1.2, 0.25)) +
  xlim(0,1) +
  guides(fill=FALSE) +
  scale_fill_brewer(type = 'div', palette = 'Set3', direction = -1) +
      theme(text = element_text(size=20)) +
  ylim(0,1)
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.

ggplot(means, aes(x=model_manual, y=participants,colour =rate_diff)) +
  geom_point() +
  theme_minimal() +
  ylab("Participants") +
  xlab("Model Predictions (manual priors)") +
#  ylim(0,1) +
#  geom_abline(intercept = 0, slope = 1) +
  geom_smooth(method="lm", se = FALSE) +
  scale_y_continuous(breaks=seq(0, 1.2, 0.25)) +
  scale_x_continuous(breaks=seq(0, 1.2, 0.25)) +
  xlim(0,1) +
  guides(fill=FALSE) +
  scale_fill_brewer(type = 'div', palette = 'Set3', direction = -1) +
      theme(text = element_text(size=20)) +
  ylim(0,1)
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.

ggplot(means, aes( x=model_hmc, y=participants,colour =rate_diff)) +
  geom_point() +
  theme_minimal() +
  ylab("Participants") +
  xlab("Model Predictions (inferred priors)") +
#  ylim(0,1) +
#  geom_abline(intercept = 0, slope = 1) +
  geom_smooth(method="lm", se = FALSE) +
  scale_y_continuous(breaks=seq(0, 1.2, 0.25)) +
  scale_x_continuous(breaks=seq(0, 1.2, 0.25)) +
  xlim(0,1) +
  guides(fill=FALSE) +
  scale_fill_brewer(type = 'div', palette = 'Set3', direction = -1) +
      theme(text = element_text(size=20)) +
  ylim(0,1)
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.

Plots of different beta distributions.

p = seq(0,1, length=100)

# Beta distributions for empirical priors
 
plot(p, dbeta(p, 18.327339,1.016413), ylab="density", xlab = "rating", type ="l", col=3)
lines(p, dbeta(p, 5.300114,3.772161), col=2) 
lines(p, dbeta(p, 1.135, 4.949),  type ="l", col=4)

# Beta distributions for manual priors

plot(p, dbeta(p, 10,5), ylab="density", xlab = "rating", type ="l", col=3)
lines(p, dbeta(p, 5,3.75), col=2) 
lines(p, dbeta(p, 2,5),  type ="l", col=4)

# Beta distributions for inferred priors

plot(p, dbeta(p, 8,2.5), ylab="density", xlab = "rating", type ="l", col=3)
lines(p, dbeta(p, 5.5,3.5), col=2) 
lines(p, dbeta(p, 4.25,9),  type ="l", col=4)

Tables

rate.means = rate.tidy %>%
  group_by(trial, obs_diff, rate_diff, outcome, amount) %>%
  summarize(mean = mean(as.numeric(rating)))

write.csv(rate.means, "participant_means.csv")

Analyses

Baseline ratings.

# Linear regression 
summary(lm(responses ~ rate_diff, data=base.tidy))
## 
## Call:
## lm(formula = responses ~ rate_diff, data = base.tidy)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.282  -7.562  -0.452   4.848  77.296 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    95.152      1.338   71.14   <2e-16 ***
## rate_diff3/6  -36.670      1.892  -19.39   <2e-16 ***
## rate_diff1/6  -78.248      1.892  -41.37   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.24 on 291 degrees of freedom
## Multiple R-squared:  0.8548, Adjusted R-squared:  0.8538 
## F-statistic: 856.6 on 2 and 291 DF,  p-value: < 2.2e-16
baseline.means = base.tidy %>%
  group_by(rate_diff) %>%
  multi_boot_standard(col = "responses")
baseline.means
## # A tibble: 3 x 4
## # Groups:   rate_diff [?]
##   rate_diff ci_lower ci_upper  mean
##   <fct>        <dbl>    <dbl> <dbl>
## 1 5/6           94.2     96.1  95.2
## 2 3/6           55.3     61.6  58.5
## 3 1/6           13.9     20.3  16.9
# Bartlett's test of variance
bartlett.test(responses ~ rate_diff, data=base.tidy)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  responses by rate_diff
## Bartlett's K-squared = 124.91, df = 2, p-value < 2.2e-16
easy.board = filter(base.tidy, base.tidy$rate_diff == "5/6")
medium.board = filter(base.tidy, base.tidy$rate_diff == "3/6")
hard.board = filter(base.tidy, base.tidy$rate_diff == "1/6")
var(easy.board$responses)
## [1] 23.41345
var(medium.board$responses)
## [1] 248.4238
var(hard.board$responses)
## [1] 254.1792

Test ratings.

#this is where BDA will go?
summary(lmer(as.numeric(rating) ~ obs_diff + rate_diff + amount + outcome + (1|id), rate.tidy))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: as.numeric(rating) ~ obs_diff + rate_diff + amount + outcome +  
##     (1 | id)
##    Data: rate.tidy
## 
## REML criterion at convergence: 31144.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.1681 -0.5965  0.0036  0.6288  4.0165 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept)  14.42    3.797  
##  Residual             391.80   19.794  
## Number of obs: 3528, groups:  id, 98
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            81.2683     0.9615 1046.5847  84.519   <2e-16 ***
## obs_diff3/6            11.3105     0.8169 3424.0000  13.846   <2e-16 ***
## obs_diff1/6            21.7254     0.8169 3424.0000  26.596   <2e-16 ***
## rate_diff3/6          -18.5839     0.8167 3424.0000 -22.756   <2e-16 ***
## rate_diff1/6          -41.0745     0.8169 3424.0000 -50.283   <2e-16 ***
## amount3 Observations   -0.6407     0.6668 3424.0000  -0.961    0.337    
## outcomefail           -39.8488     0.6668 3424.0000 -59.765   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ob_3/6 ob_1/6 rt_3/6 rt_1/6 amnt3O
## obs_diff3/6 -0.416                                   
## obs_diff1/6 -0.425  0.500                            
## rate_dff3/6 -0.438  0.000  0.022                     
## rate_dff1/6 -0.417 -0.031 -0.011  0.499              
## amnt3Obsrvt -0.343 -0.009 -0.017  0.003  0.020       
## outcomefail -0.351  0.009  0.007  0.017 -0.007 -0.008

Correlations to model.

# Correlation with empirical priors
cor.test(means$participants, means$model_empirical)
## 
##  Pearson's product-moment correlation
## 
## data:  means$participants and means$model_empirical
## t = 11.974, df = 34, p-value = 9.558e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8096904 0.9476793
## sample estimates:
##       cor 
## 0.8990641
# Correlation with manual priors
cor.test(means$participants, means$model_manual)
## 
##  Pearson's product-moment correlation
## 
## data:  means$participants and means$model_manual
## t = 20.564, df = 34, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9263218 0.9806484
## sample estimates:
##       cor 
## 0.9620707
# Correlation with hmc inferred priors
cor.test(means$participants, means$model_hmc)
## 
##  Pearson's product-moment correlation
## 
## data:  means$participants and means$model_hmc
## t = 25.85, df = 34, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9520828 0.9875374
## sample estimates:
##       cor 
## 0.9754914