Load the data.
path <- "~/code/aucProject/data/"
files <- dir(paste0(path,"sandbox-results/"), pattern = "*.json")
d.raw <- data.frame()
d.balance <- data.frame()
for (f in files) {
jf <- paste0(path, "sandbox-results/", f)
jd <- fromJSON(paste(readLines(jf), collapse=""))
answers <- jd$answers$data
race <- paste(answers$race, collapse="+")
id <- data.frame(workerid = jd$WorkerId,
race = race,
face_dft = as.numeric(answers$face),
face_rating = as.numeric(answers$rating),
exp_type = answers$type,
elapsed_ms = answers$elapsed_ms,
num_errors = answers$num_errors,
gender = answers$gender,
education = answers$education)
d.raw <- bind_rows(d.raw, id)
id <- data.frame(workerid = jd$WorkerId,
race = race,
exp_type = answers$type,
gender = answers$gender,
education = answers$education)
d.balance <- bind_rows(d.balance, id)
}
message("Number of participants: ", length(unique(d.raw$workerid)))
## Number of participants: 4
Simple balance checks:
# Balance by gender:
table(d.balance$gender)
##
## female
## 4
# Balance by type:
table(d.balance$exp_type)
##
## attractive trustworthy
## 2 2
# Balance by Face DFT's presented:
table(d.raw$face_dft)
##
## 0 10 20 30 40 50 60 70 80 90 100
## 12 12 12 12 12 12 12 12 12 12 12
Quick check to see distribution of results per participant.
ggplot(d.raw, aes(x=face_dft, y=face_rating, color=factor(workerid))) +
geom_point(stat="identity", alpha=0.5) +
facet_wrap(~exp_type)
Check errors and reaction times
d.raw %>%
filter(elapsed_ms < 10000) %>%
ggplot(aes(x=elapsed_ms)) +
geom_histogram(bins=20) +
facet_wrap(~num_errors)
Identify outliers
ggplot(d.raw, aes(x=exp_type, y=elapsed_ms)) +
geom_boxplot()
ggplot(d.raw, aes(x=factor(num_errors), y=elapsed_ms)) +
geom_boxplot()
ggplot(d.raw, aes(x=factor(face_dft), face_rating)) +
geom_boxplot() +
facet_wrap(~exp_type)
Compute within-subjects means, and errors across participants to generate Figure 2 from the paper.
ms <- d.raw %>%
group_by(exp_type, face_dft, workerid) %>%
summarise(mean_rating = mean(face_rating))
cis <- ms %>%
group_by(exp_type, face_dft) %>%
summarise(ci=ci95(mean_rating), mean_rating=mean(mean_rating))
ggplot(cis, aes(x=factor(face_dft), y=mean_rating, color=exp_type, group=exp_type)) +
geom_point() +
geom_line() +
geom_errorbar(aes(ymax=mean_rating+ci, ymin=mean_rating-ci),
width=.25) +
xlab('DFT %') +
ylab('Mean Judgment')
The authors also did a multiple regression analysis which predicted the judgments using DFT, DFT-squared, experiment type and their interactions (all predictors centered).
mean_dft <- mean(ms$face_dft)
mean_face_rating <- mean(ms$mean_rating)
centered <- ms %>%
mutate(face_rating = mean_rating - mean_face_rating,
dft = face_dft - mean_dft)
pred <- lm(face_rating ~ exp_type + face_dft + I(face_dft ** 2) + face_dft * exp_type + I(face_dft**2) * exp_type,
centered)
summary(pred)
##
## Call:
## lm(formula = face_rating ~ exp_type + face_dft + I(face_dft^2) +
## face_dft * exp_type + I(face_dft^2) * exp_type, data = centered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4716 -0.5039 0.0979 0.5400 1.4582
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.4248252 0.4316785 -5.617 1.91e-06
## exp_typetrustworthy 4.0780886 0.6104856 6.680 6.68e-08
## face_dft 0.1414530 0.0200842 7.043 2.15e-08
## I(face_dft^2) -0.0012782 0.0001934 -6.608 8.39e-08
## exp_typetrustworthy:face_dft -0.1750796 0.0284034 -6.164 3.39e-07
## exp_typetrustworthy:I(face_dft^2) 0.0012364 0.0002736 4.520 5.88e-05
##
## (Intercept) ***
## exp_typetrustworthy ***
## face_dft ***
## I(face_dft^2) ***
## exp_typetrustworthy:face_dft ***
## exp_typetrustworthy:I(face_dft^2) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8013 on 38 degrees of freedom
## Multiple R-squared: 0.7269, Adjusted R-squared: 0.6909
## F-statistic: 20.22 on 5 and 38 DF, p-value: 8.545e-10
Finally, the authors computed a by-participant ANOVA with DFT as a repeated measure and experiment type as a between-subjects factor. > Not sure if I got the formula right for this…
anova <- aov(mean_rating ~ exp_type + face_dft + Error(workerid / face_dft),
ms)
summary(anova)
##
## Error: workerid
## Df Sum Sq Mean Sq
## exp_type 1 0.2227 0.2227
##
## Error: workerid:face_dft
## Df Sum Sq Mean Sq
## face_dft 1 0.7433 0.7433
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## exp_type 1 2.69 2.691 1.857 0.181
## face_dft 1 29.14 29.138 20.100 6.29e-05 ***
## Residuals 39 56.54 1.450
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ms.gender <- d.raw %>%
group_by(gender, exp_type, face_dft, workerid) %>%
summarise(mean = mean(face_rating)) %>%
group_by(gender, exp_type, face_dft) %>%
summarise(ci=ci95(mean), mean=mean(mean))
ggplot(ms.gender, aes(x=factor(face_dft), y=mean, color=exp_type, group=exp_type)) +
geom_point() +
geom_line() +
geom_errorbar(aes(ymax=mean+ci, ymin=mean-ci),
width=.25) +
xlab('DFT %') +
ylab('Mean Judgment') +
facet_wrap(~gender)
ms.race <- d.raw %>%
group_by(race, exp_type, face_dft, workerid) %>%
summarise(mean = mean(face_rating)) %>%
group_by(race, exp_type, face_dft) %>%
summarise(ci=ci95(mean), mean=mean(mean))
ggplot(ms.race, aes(x=factor(face_dft), y=mean, color=exp_type, group=exp_type)) +
geom_point() +
geom_line() +
geom_errorbar(aes(ymax=mean+ci, ymin=mean-ci),
width=.25) +
xlab('DFT %') +
ylab('Mean Judgment') +
facet_wrap(~race)