Main confirmatory analysis
Note that following the original paper, the error bars in the plots indicate the standard deviation of the measures across subjects.
Load data and exclude subjects:
library(tidyverse)
library(minpack.lm)
# raw mturk data is preprocessed using iPython Jupyter Notebook
# preprocessing steps include:
# extracting relevant columns
# organizing into tidy form
# add triangle parameters for each trial
# Link: https://github.com/psych251/hart2018/tree/master/code/preprocess.ipynb
data = read.csv('../data/final_postprocess_corrected.csv')
data = data %>%
mutate(deltaY=scaledY-topY) %>% # create a column for y-axis estimate deviation
filter(trial!='practice') # exclude practice trials
exclude_subject= function(s) {
x = data[data$subject==s, ]
# exclusion 1. Participants who failed to complete all 150 trials will be excluded.
if (nrow(x)!= 150) { return(TRUE) }
if (sum(is.na(x$estY))>0) { return(TRUE) }
# exclusion 2. Participants whose y-axis estimate fell below the triangle base (y=0) for more than 20% of the trials will be excluded.
if ((sum(x$estY<0)/length(x$estY)) > 0.2) { return(TRUE) }
# exclusion 3. Participants whose x-axis estimate fell to the left of the x-coordinate of the left vertex or to the right of the x-coordinate of the right vertex for more than 20% of the trials will be excluded.
left = sum(x$scaledX < x$leftX)
right = sum(x$scaledX > x$rightX)
if ((left + right)/length(x$scaledX) > 0.2) { return(TRUE) }
return(FALSE)
}
subjects = unique(data$subject)
exclude = numeric()
for (s in subjects) {
if (exclude_subject(s)) {exclude=c(exclude, s)}
}
data = data %>% filter(! subject %in% exclude)
Non-linear regression: δ ~ a1 * Lb1
delta = data %>%
group_by(subject, sideLen) %>%
summarize(meanDeltaY = mean(deltaY))
subjects = unique(delta$subject)
as = numeric()
b1 = numeric()
for (s in subjects) {
x = delta[delta$subject==s,]
fit = nlsLM(meanDeltaY ~ a*(sideLen)^b, data=x, start=list(a=1, b=1))
as <- c(as, coef(fit)['a'])
b1 <- c(b1, coef(fit)['b'])
}
mean(as)
## [1] -0.2557866
mean(b1)
## [1] 2.789955
t.test(b1, mu=1)
##
## One Sample t-test
##
## data: b1
## t = 6.4629, df = 36, p-value = 1.68e-07
## alternative hypothesis: true mean is not equal to 1
## 95 percent confidence interval:
## 2.228254 3.351656
## sample estimates:
## mean of x
## 2.789955
Non-linear regression: σ ~ a2 * Lb2
sigma = data %>%
group_by(subject, sideLen) %>%
summarize(estYstd = sd(scaledY))
subjects = unique(sigma$subject)
as = numeric()
b2 = numeric()
for (s in subjects) {
x = sigma[sigma$subject==s,]
fit = nlsLM(estYstd ~ a*sideLen^b, data=x, start=list(a=1, b=1))
as<- c(as, coef(fit)['a'])
b2 <- c(b2, coef(fit)['b'])
}
mean(as)
## [1] 0.1524901
mean(b2)
## [1] 0.5710148
t.test(b2, mu=1)
##
## One Sample t-test
##
## data: b2
## t = -6.4501, df = 36, p-value = 1.747e-07
## alternative hypothesis: true mean is not equal to 1
## 95 percent confidence interval:
## 0.4361293 0.7059003
## sample estimates:
## mean of x
## 0.5710148
Plot 1: Mean error of y-coordinate estimation (δ) as a function of triangle side length
plotdata1 = data %>%
group_by(subject, baseAngle, sideLen) %>%
summarize(meanDeltaY=mean(deltaY)) %>% # within subject mean y-axis deviation
group_by(baseAngle, sideLen) %>%
summarize(stdDeltaY=sd(meanDeltaY), meanDeltaY=mean(meanDeltaY)) # across subject mean and std y-axis deviation
plotdata1$baseAngle = as.factor(plotdata1$baseAngle) # for coloring
ggplot(plotdata1, aes(x=sideLen, y=meanDeltaY, color=baseAngle)) +
geom_point() +
geom_errorbar(aes(ymin=meanDeltaY-stdDeltaY/2, ymax=meanDeltaY+stdDeltaY/2)) +
labs(x='L, triangle side length', y='δ, bias', color='Base Angle')
Plot 2: Mean standard deviation of y-coordinate estimation (σ) as a function of triangle side length
plotdata2 = data %>%
group_by(subject, baseLen, sideLen) %>%
summarize(sigmaY=sd(scaledY)) %>% # within subject std y-axis estimates
group_by(baseLen, sideLen) %>%
summarize(stdSigmaY=sd(sigmaY), meanSigmaY=mean(sigmaY)) # across subject std y-axis estsimates
plotdata2$baseLen = as.factor(plotdata2$baseLen) # for coloring
ggplot(plotdata2, aes(x=sideLen, y=meanSigmaY, color=baseLen)) +
geom_point() +
geom_errorbar(aes(ymin=meanSigmaY-stdSigmaY/2, ymax=meanSigmaY+stdSigmaY/2)) +
labs(x='L, triangle side length', y='σ, std', color='Base Length')
Original paper (Figure S4, mturk study, N=100):
Note that the delta and sigma (y-axes values) in the original paper’s analyses were calculated using pixel-space y-estimates, whereas this replication calculated these values in the scaled-space.