1 Data prep
2 Initial data plots
3 Replicating analysis
4 Verify DV’s that differ from original study
- 4.1 Effect of gender
- 4.2 Effect of ethnicity

1 Data prep

Load the data.

path <- "~/code/aucProject/data/"
files <- dir(paste0(path,"sandbox-results/"), pattern = "*.json")
d.raw <- data.frame()
d.balance <- data.frame()

for (f in files) {
  jf <- paste0(path, "sandbox-results/", f)
  jd <- fromJSON(paste(readLines(jf), collapse=""))
  answers <- jd$answers$data
  race <- paste(answers$race, collapse="+")
  id <- data.frame(workerid = jd$WorkerId,
                   race = race,
                   face_dft = as.numeric(answers$face),
                   face_rating = as.numeric(answers$rating),
                   exp_type = answers$type,
                   elapsed_ms = answers$elapsed_ms,
                   num_errors = answers$num_errors,
                   gender = answers$gender,
                   education = answers$education)
  d.raw <- bind_rows(d.raw, id)
  
  id <- data.frame(workerid = jd$WorkerId,
                   race = race,
                   exp_type = answers$type,
                   gender = answers$gender,
                   education = answers$education)
  d.balance <- bind_rows(d.balance, id)
}

message("Number of participants: ", length(unique(d.raw$workerid)))

## Number of participants: 4

Simple balance checks:

# Balance by gender:
table(d.balance$gender)

## 
## female 
##      4

# Balance by type:
table(d.balance$exp_type)

## 
##  attractive trustworthy 
##           2           2

# Balance by Face DFT's presented:
table(d.raw$face_dft)

## 
##   0  10  20  30  40  50  60  70  80  90 100 
##  12  12  12  12  12  12  12  12  12  12  12

2 Initial data plots

Quick check to see distribution of results per participant.

ggplot(d.raw, aes(x=face_dft, y=face_rating, color=factor(workerid))) +
  geom_point(stat="identity", alpha=0.5) + 
  facet_wrap(~exp_type)

Check errors and reaction times

d.raw %>%
  filter(elapsed_ms < 10000) %>%
  ggplot(aes(x=elapsed_ms)) +
    geom_histogram(bins=20) +
    facet_wrap(~num_errors)

Identify outliers

ggplot(d.raw, aes(x=exp_type, y=elapsed_ms)) +
  geom_boxplot()

ggplot(d.raw, aes(x=factor(num_errors), y=elapsed_ms)) +
  geom_boxplot()

ggplot(d.raw, aes(x=factor(face_dft), face_rating)) +
  geom_boxplot() +
  facet_wrap(~exp_type)

3 Replicating analysis

Compute within-subjects means, and errors across participants to generate Figure 2 from the paper.

ms <- d.raw %>%
  group_by(exp_type, face_dft, workerid) %>%
  summarise(mean_rating = mean(face_rating))

cis <- ms %>%
  group_by(exp_type, face_dft) %>%
  summarise(ci=ci95(mean_rating), mean_rating=mean(mean_rating))

ggplot(cis, aes(x=factor(face_dft), y=mean_rating, color=exp_type, group=exp_type)) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymax=mean_rating+ci, ymin=mean_rating-ci),
                width=.25) +
  xlab('DFT %') +
  ylab('Mean Judgment')

The authors also did a multiple regression analysis which predicted the judgments using DFT, DFT-squared, experiment type and their interactions (all predictors centered).

mean_dft <- mean(ms$face_dft)
mean_face_rating <- mean(ms$mean_rating)
centered <- ms %>%
  mutate(face_rating = mean_rating - mean_face_rating,
         dft = face_dft - mean_dft)
pred <- lm(face_rating ~ exp_type + face_dft + I(face_dft ** 2) + face_dft * exp_type + I(face_dft**2) * exp_type,
           centered)
summary(pred)

## 
## Call:
## lm(formula = face_rating ~ exp_type + face_dft + I(face_dft^2) + 
##     face_dft * exp_type + I(face_dft^2) * exp_type, data = centered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4716 -0.5039  0.0979  0.5400  1.4582 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)                       -2.4248252  0.4316785  -5.617 1.91e-06
## exp_typetrustworthy                4.0780886  0.6104856   6.680 6.68e-08
## face_dft                           0.1414530  0.0200842   7.043 2.15e-08
## I(face_dft^2)                     -0.0012782  0.0001934  -6.608 8.39e-08
## exp_typetrustworthy:face_dft      -0.1750796  0.0284034  -6.164 3.39e-07
## exp_typetrustworthy:I(face_dft^2)  0.0012364  0.0002736   4.520 5.88e-05
##                                      
## (Intercept)                       ***
## exp_typetrustworthy               ***
## face_dft                          ***
## I(face_dft^2)                     ***
## exp_typetrustworthy:face_dft      ***
## exp_typetrustworthy:I(face_dft^2) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8013 on 38 degrees of freedom
## Multiple R-squared:  0.7269, Adjusted R-squared:  0.6909 
## F-statistic: 20.22 on 5 and 38 DF,  p-value: 8.545e-10

Finally, the authors computed a by-participant ANOVA with DFT as a repeated measure and experiment type as a between-subjects factor. > Not sure if I got the formula right for this…

anova <- aov(mean_rating ~ exp_type + face_dft + Error(workerid / face_dft),
             ms)
summary(anova)

## 
## Error: workerid
##          Df Sum Sq Mean Sq
## exp_type  1 0.2227  0.2227
## 
## Error: workerid:face_dft
##          Df Sum Sq Mean Sq
## face_dft  1 0.7433  0.7433
## 
## Error: Within
##           Df Sum Sq Mean Sq F value   Pr(>F)    
## exp_type   1   2.69   2.691   1.857    0.181    
## face_dft   1  29.14  29.138  20.100 6.29e-05 ***
## Residuals 39  56.54   1.450                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4 Verify DV’s that differ from original study

4.1 Effect of gender

ms.gender <- d.raw %>%
  group_by(gender, exp_type, face_dft, workerid) %>%
  summarise(mean = mean(face_rating)) %>%
  group_by(gender, exp_type, face_dft) %>%
  summarise(ci=ci95(mean), mean=mean(mean))

ggplot(ms.gender, aes(x=factor(face_dft), y=mean, color=exp_type, group=exp_type)) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymax=mean+ci, ymin=mean-ci),
                width=.25) +
  xlab('DFT %') +
  ylab('Mean Judgment') +
  facet_wrap(~gender)

4.2 Effect of ethnicity

ms.race <- d.raw %>%
  group_by(race, exp_type, face_dft, workerid) %>%
  summarise(mean = mean(face_rating)) %>%
  group_by(race, exp_type, face_dft) %>%
  summarise(ci=ci95(mean), mean=mean(mean))

ggplot(ms.race, aes(x=factor(face_dft), y=mean, color=exp_type, group=exp_type)) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymax=mean+ci, ymin=mean-ci),
                width=.25) +
  xlab('DFT %') +
  ylab('Mean Judgment') +
  facet_wrap(~race)

Pilot analysis

Carolyn Au

2016-02-27