Date run: 11/20/2018
Experiment structure:
First, participants rated the likelihood of success for each board used in the experiment (1/6, 3/6, 5/6; referred to as baseline trials).
Second, participants learned about the task and answered some check questions (referred to as demo trials).
Third, for the bulk of the study, participants underwent 36 unique trials with agents who attempted different dart boards and then rated the likelihood of each agent’s success on a dart board (referred to as test trials; 1 = definitely fail, 100 = definitely succeed).
Finally, participants filled out a demographics survey.
Trial information:
The 36 trials varied with respect to:
Data structure:
** Link to task: http://web.stanford.edu/~masaba/204project/difficulty_task.html **
** Link to model: https://github.com/masaba/darts_difficulty/blob/master/model_humanpriors **
library(ggplot2)
library(lme4)
library(tidyverse)
library(readxl)
library(langcog) # https://github.com/langcog/langcog-package
library(psych)
library(viridis) #creating colorblind-friendly fills
library(gridExtra)
library(lmerTest)
library(compute.es)
library(metafor)
library(rjson)
library(stats)Demographics data frame.
setwd("../exp1/mturk/production-results/")
files = list.files(pattern = '*.json')
demog.data.raw <- data.frame()
for (f in 1:length(files)) {
#read in file
jd <- fromJSON(file = files[f])
demog.data.subj <- data.frame(
id = jd$AssignmentId, #don't use worker ID!
age = jd$answers$data$demographics[[1]][[1]]$value,
lang = jd$answers$data$demographics[[1]][[2]]$value,
race = jd$answers$data$demographics[[1]][[3]]$value,
gender = jd$answers$data$demographics[[1]][[4]]$value,
colorblind =jd$answers$data$demographics[[1]][[5]]$value,
focus = jd$answers$data$demographics[[1]][[6]]$value,
issues = jd$answers$data$demographics[[1]][[7]]$value)
demog.data.raw <- bind_rows(demog.data.raw, demog.data.subj)
}Baseline ratings data frame.
setwd("../exp1/mturk/production-results/")
files = list.files(pattern = '*.json')
baseline.data.raw <- data.frame()
for (f in 1:length(files)) {
#read in file
jd <- fromJSON(file = files[f])
baseline.data.subj <- data.frame(
id = jd$AssignmentId, #don't use worker ID!
trial = jd$answers$data$order_baseline,
responses = jd$answers$data$responses_baseline)
baseline.data.raw <- bind_rows(baseline.data.raw, baseline.data.subj)
}Demo check questions data frame.
#get data from ver1 participants
setwd("../exp1/mturk/production-results/")
files = list.files(pattern = '*.json')
check.data.raw <- data.frame()
for (f in 1:length(files)) {
#read in file
jd <- fromJSON(file = files[f])
check.data.subj <- data.frame(
id = jd$AssignmentId, #don't use worker ID!
order_demo = jd$answers$data$order_demo,
responses_demo = jd$answers$data$responses_demo)
check.data.raw <- bind_rows(check.data.raw, check.data.subj)
}Observation check questions data frame.
#get data from ver1 participants
setwd("../exp1/mturk/production-results/")
files = list.files(pattern = '*.json')
obs.data.raw <- data.frame()
for (f in 1:length(files)) {
#read in file
jd <- fromJSON(file = files[f])
obs.data.subj <- data.frame(
id = jd$AssignmentId, #don't use worker ID!
# order_observation = jd$answers$data$order_observation,
responses_observation = jd$answers$data$responses_observation,
responses_observation_correct = jd$answers$data$correct_observation_responses)
obs.data.raw <- bind_rows(obs.data.raw, obs.data.subj)
}Ratings data frame.
#get data from ver1 participants
setwd("../exp1/mturk/production-results/")
files = list.files(pattern = '*.json')
rate.data.raw <- data.frame()
for (f in 1:length(files)) {
#read in file
jd <- fromJSON(file = files[f])
rate.data.subj <- data.frame(
id = jd$AssignmentId, #don't use worker ID!
trial = jd$answers$data$order_trials,
rating = jd$answers$data$responses_rating)
rate.data.raw <- bind_rows(rate.data.raw, rate.data.subj)
}Model predictions.
means = read.csv("means.csv")Summarize demo check questions responses.
check.table = check.data.raw %>%
mutate(correct = order_demo == responses_demo) %>%
filter(correct == FALSE) %>%
summarize(id = id,
correct = sum(correct))
check.table## id correct
## 1 338JKRMM27CG143XB0AER8F02S9AH1 0
1 person failed one of the intro/demo check questions.
Summarize observation check questions responses.
obs.table = obs.data.raw %>%
mutate(responses_observation = factor(responses_observation, levels = c("fail","success"), labels = c("fail", "succ"))) %>%
mutate(correct = responses_observation == responses_observation_correct) %>%
filter(correct == FALSE) %>%
group_by(id) %>%
summarize(n = n(),
corect = sum(correct))
obs.table## # A tibble: 7 x 3
## id n corect
## <chr> <int> <int>
## 1 32XVDSJFP0ADOF94PIXCDV4Y7QGM2F 1 0
## 2 339ANSOTR6FM9CN3T95OLYJB7HIKIJ 10 0
## 3 3FDJT1UU75LAXOSOMPMUWPF62VCK59 1 0
## 4 3FTF2T8WLSVKTBOHETIEWGE3BZLW9R 1 0
## 5 3OS4RQUCRAS16IHJMOMAJYNT4VHFB3 1 0
## 6 3TYCR1GOTDWJO8UVED5B0TZGSUPLZK 1 0
## 7 3WT783CTPCUU36X9VMW9BS2Q7IPCBP 1 0
1 person missed 10 of these questions; 6 people missed 1.
Tidy baseline data.
base.tidy = baseline.data.raw %>%
mutate(responses = as.numeric(responses),
rate_diff = factor(trial, levels=c("baseline_5","baseline_3","baseline_1"), labels=c("5/6", "3/6", "1/6")),
amount = "baseline") %>%
select(id, rate_diff, amount, responses) %>%
filter(id != "339ANSOTR6FM9CN3T95OLYJB7HIKIJ", # remove person who failed 10 observation check questions
id != "338JKRMM27CG143XB0AER8F02S9AH1") # remove person who failed intro check question)Tidy ratings data.
rate.tidy = rate.data.raw %>%
mutate(trial = str_sub(trial, 6, str_length(trial)), # remove names
obs_diff = str_sub(trial, 1, 1),
rate_diff = str_sub(trial, 3, 3),
amount = str_sub(trial, 5, 5),
outcome = str_sub(trial, 7, str_length(trial))) %>%
mutate(id = factor(id),
obs_diff = factor(obs_diff, levels=c(5, 3, 1), labels=c("5/6","3/6","1/6")),
rate_diff = factor(rate_diff, levels=c(5, 3, 1), labels=c("5/6","3/6","1/6")),
amount = factor(amount, levels=c(1, 3), labels = c("1 Observation", "3 Observations")),
outcome = factor(outcome, levels=c("succ","fail"), labels=c("success","fail"))) %>%
filter(id != "339ANSOTR6FM9CN3T95OLYJB7HIKIJ", # remove person who failed 10 observation check questions
id != "338JKRMM27CG143XB0AER8F02S9AH1") # remove person who failed intro check question
# df for rating board difficulty 1
rate.tidy.hard = filter(rate.tidy, rate_diff == "hard")
# df for rating board difficulty 3
rate.tidy.med = filter(rate.tidy, rate_diff == "medium")
# df for rating board difficulty 5
rate.tidy.easy = filter(rate.tidy, rate_diff == "easy")
#join bsaeline data with ratings data
rate.base.tidy = full_join(rate.tidy, base.tidy)## Joining, by = c("id", "rate_diff", "amount")
## Warning: Column `id` joining factor and character vector, coercing into
## character vector
## Warning: Column `amount` joining factor and character vector, coercing into
## character vector
rate.tidy.success = filter(rate.tidy, outcome == "success")dems = demog.data.raw %>%
filter(id != "339ANSOTR6FM9CN3T95OLYJB7HIKIJ", # remove person who failed 10 observation check questions
id != "338JKRMM27CG143XB0AER8F02S9AH1") %>% # remove person who failed 1 intro check question
mutate(age = as.numeric(age)) %>%
# group_by(id) %>%
summarize(n=n(),
mean_age = mean(age),
sd_age = sd(age),
min_age = min(age),
max_age = max(age),
female = sum(gender == "female"))
dems## n mean_age sd_age min_age max_age female
## 1 98 36.19388 11.16018 21 72 31
Baseline difficulty judgments
# Histogram
ggplot(base.tidy, aes(x = responses, fill = rate_diff)) +
geom_histogram(binwidth=10) +
facet_grid(~ rate_diff) +
theme_minimal() +
ggtitle("Histogram of baseline success ratings") +
xlab("Responses (1 = Definitely Fail; 100 = Definitely Succeed")# Density plot
ggplot(base.tidy) +
geom_density(aes(x = responses, fill =rate_diff)) +
#facet_grid(~ board) +
theme_minimal() +
ggtitle("Density plots of baseline success ratings")+
xlab("Responses (1 = Definitely Fail; 100 = Definitely Succeed)") +
facet_grid (~rate_diff) +
scale_color_brewer(palette="Dark2",direction=-1)# Boxplot
ggplot(base.tidy, aes(x = rate_diff, y=responses,color=rate_diff)) +
geom_boxplot(width=.5) +
coord_cartesian(ylim=c(1,100)) +
theme_minimal() +
xlab("Board Type") +
ylab("Rating (1-100)") +
ggtitle("Boxplot of baseline success ratings")+
xlab("Responses (1 = Definitely Fail; 100 = Definitely Succeed")Ratings boxplots facetted by the difficulty of the rating board.
ggplot(rate.tidy, aes(x = obs_diff, y=as.numeric(rating),color=outcome)) +
#geom_violin() +
geom_boxplot(width=.5) +
facet_grid(amount ~ rate_diff) +
theme_minimal() +
xlab("Difficulty of Observed Board") +
ylab("Rating (1-100)") +
coord_cartesian(ylim=c(1,100)) +
# theme(legend.title = "Observed Board") +
ggtitle("Likelihood of success on each board") +
theme(panel.spacing = unit(2, "lines")) +
scale_color_brewer(palette="Dark2")Density plots.
ggplot(rate.tidy) +
geom_density(aes(x=round(as.numeric(rating),0),fill=obs_diff),alpha=.4) +
facet_grid(outcome ~ rate_diff) +
theme_minimal() +
xlab("Rating") +
ylab("Density") #ggtitle("Likelihood of success on 5/6 (easy) board") +
# scale_color_brewer(palette="Dark2")
#only success
ggplot(rate.tidy.success) +
geom_density(aes(x=as.numeric(rating),fill=amount),alpha=.4) +
facet_grid(obs_diff ~ rate_diff) +
xlab("Rating") +
ylab("Density") +
# scale_fill_manual(c("purple2","lightgoldenrod2")) +
scale_fill_brewer(type = 'div', palette = 'Set3', direction = -1) +
theme_minimal() #ggtitle("Likelihood of success on 5/6 (easy) board") +
# scale_color_brewer(palette="Dark2")Correlation plot.
means$rate_diff = factor(means$rate_diff, levels=c("easy","medium","hard"))
ggplot(means, aes( x=model_empirical, y=participants,colour =rate_diff)) +
geom_point() +
theme_minimal() +
ylab("Participants") +
xlab("Model Predictions (Empirical priors)") +
# ylim(0,1) +
# geom_abline(intercept = 0, slope = 1) +
geom_smooth(method="lm", se = FALSE) +
scale_y_continuous(breaks=seq(0, 1.2, 0.25)) +
scale_x_continuous(breaks=seq(0, 1.2, 0.25)) +
xlim(0,1) +
guides(fill=FALSE) +
scale_fill_brewer(type = 'div', palette = 'Set3', direction = -1) +
theme(text = element_text(size=20)) +
ylim(0,1)## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
ggplot(means, aes(x=model_manual, y=participants,colour =rate_diff)) +
geom_point() +
theme_minimal() +
ylab("Participants") +
xlab("Model Predictions (manual priors)") +
# ylim(0,1) +
# geom_abline(intercept = 0, slope = 1) +
geom_smooth(method="lm", se = FALSE) +
scale_y_continuous(breaks=seq(0, 1.2, 0.25)) +
scale_x_continuous(breaks=seq(0, 1.2, 0.25)) +
xlim(0,1) +
guides(fill=FALSE) +
scale_fill_brewer(type = 'div', palette = 'Set3', direction = -1) +
theme(text = element_text(size=20)) +
ylim(0,1)## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
ggplot(means, aes( x=model_hmc, y=participants,colour =rate_diff)) +
geom_point() +
theme_minimal() +
ylab("Participants") +
xlab("Model Predictions (inferred priors)") +
# ylim(0,1) +
# geom_abline(intercept = 0, slope = 1) +
geom_smooth(method="lm", se = FALSE) +
scale_y_continuous(breaks=seq(0, 1.2, 0.25)) +
scale_x_continuous(breaks=seq(0, 1.2, 0.25)) +
xlim(0,1) +
guides(fill=FALSE) +
scale_fill_brewer(type = 'div', palette = 'Set3', direction = -1) +
theme(text = element_text(size=20)) +
ylim(0,1)## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
Plots of different beta distributions.
p = seq(0,1, length=100)
# Beta distributions for empirical priors
plot(p, dbeta(p, 18.327339,1.016413), ylab="density", xlab = "rating", type ="l", col=3)
lines(p, dbeta(p, 5.300114,3.772161), col=2)
lines(p, dbeta(p, 1.135, 4.949), type ="l", col=4)# Beta distributions for manual priors
plot(p, dbeta(p, 10,5), ylab="density", xlab = "rating", type ="l", col=3)
lines(p, dbeta(p, 5,3.75), col=2)
lines(p, dbeta(p, 2,5), type ="l", col=4)# Beta distributions for inferred priors
plot(p, dbeta(p, 8,2.5), ylab="density", xlab = "rating", type ="l", col=3)
lines(p, dbeta(p, 5.5,3.5), col=2)
lines(p, dbeta(p, 4.25,9), type ="l", col=4)rate.means = rate.tidy %>%
group_by(trial, obs_diff, rate_diff, outcome, amount) %>%
summarize(mean = mean(as.numeric(rating)))
write.csv(rate.means, "participant_means.csv")Baseline ratings.
# Linear regression
summary(lm(responses ~ rate_diff, data=base.tidy))##
## Call:
## lm(formula = responses ~ rate_diff, data = base.tidy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.282 -7.562 -0.452 4.848 77.296
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 95.152 1.338 71.14 <2e-16 ***
## rate_diff3/6 -36.670 1.892 -19.39 <2e-16 ***
## rate_diff1/6 -78.248 1.892 -41.37 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.24 on 291 degrees of freedom
## Multiple R-squared: 0.8548, Adjusted R-squared: 0.8538
## F-statistic: 856.6 on 2 and 291 DF, p-value: < 2.2e-16
baseline.means = base.tidy %>%
group_by(rate_diff) %>%
multi_boot_standard(col = "responses")
baseline.means## # A tibble: 3 x 4
## # Groups: rate_diff [?]
## rate_diff ci_lower ci_upper mean
## <fct> <dbl> <dbl> <dbl>
## 1 5/6 94.2 96.1 95.2
## 2 3/6 55.3 61.6 58.5
## 3 1/6 13.9 20.3 16.9
# Bartlett's test of variance
bartlett.test(responses ~ rate_diff, data=base.tidy)##
## Bartlett test of homogeneity of variances
##
## data: responses by rate_diff
## Bartlett's K-squared = 124.91, df = 2, p-value < 2.2e-16
easy.board = filter(base.tidy, base.tidy$rate_diff == "5/6")
medium.board = filter(base.tidy, base.tidy$rate_diff == "3/6")
hard.board = filter(base.tidy, base.tidy$rate_diff == "1/6")
var(easy.board$responses)## [1] 23.41345
var(medium.board$responses)## [1] 248.4238
var(hard.board$responses)## [1] 254.1792
Test ratings.
#this is where BDA will go?
summary(lmer(as.numeric(rating) ~ obs_diff + rate_diff + amount + outcome + (1|id), rate.tidy))## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: as.numeric(rating) ~ obs_diff + rate_diff + amount + outcome +
## (1 | id)
## Data: rate.tidy
##
## REML criterion at convergence: 31144.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.1681 -0.5965 0.0036 0.6288 4.0165
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 14.42 3.797
## Residual 391.80 19.794
## Number of obs: 3528, groups: id, 98
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 81.2683 0.9615 1046.5847 84.519 <2e-16 ***
## obs_diff3/6 11.3105 0.8169 3424.0000 13.846 <2e-16 ***
## obs_diff1/6 21.7254 0.8169 3424.0000 26.596 <2e-16 ***
## rate_diff3/6 -18.5839 0.8167 3424.0000 -22.756 <2e-16 ***
## rate_diff1/6 -41.0745 0.8169 3424.0000 -50.283 <2e-16 ***
## amount3 Observations -0.6407 0.6668 3424.0000 -0.961 0.337
## outcomefail -39.8488 0.6668 3424.0000 -59.765 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ob_3/6 ob_1/6 rt_3/6 rt_1/6 amnt3O
## obs_diff3/6 -0.416
## obs_diff1/6 -0.425 0.500
## rate_dff3/6 -0.438 0.000 0.022
## rate_dff1/6 -0.417 -0.031 -0.011 0.499
## amnt3Obsrvt -0.343 -0.009 -0.017 0.003 0.020
## outcomefail -0.351 0.009 0.007 0.017 -0.007 -0.008
Correlations to model.
# Correlation with empirical priors
cor.test(means$participants, means$model_empirical)##
## Pearson's product-moment correlation
##
## data: means$participants and means$model_empirical
## t = 11.974, df = 34, p-value = 9.558e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8096904 0.9476793
## sample estimates:
## cor
## 0.8990641
# Correlation with manual priors
cor.test(means$participants, means$model_manual)##
## Pearson's product-moment correlation
##
## data: means$participants and means$model_manual
## t = 20.564, df = 34, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9263218 0.9806484
## sample estimates:
## cor
## 0.9620707
# Correlation with hmc inferred priors
cor.test(means$participants, means$model_hmc)##
## Pearson's product-moment correlation
##
## data: means$participants and means$model_hmc
## t = 25.85, df = 34, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9520828 0.9875374
## sample estimates:
## cor
## 0.9754914