Main analysis
library(tidyverse)
library(minpack.lm)
# raw mturk data is preprocessed using iPython Jupyter Notebook
# preprocessing steps include:
# extracting relevant columns
# organizing into tidy form
# add triangle parameters for each trial
# Link: https://github.com/psych251/hart2018/tree/master/code/preprocess.ipynb
data = read.csv('../data/pilotb_postprocess.csv')
data = data %>%
mutate(deltaY=scaledY-topY) %>% # create a column for y-axis estimate deviation
filter(trial!='practice') # exclude practice trials
exclude_subject= function(s) {
x = data[data$subject==s, ]
# exclusion 1. Participants who failed to complete all 150 trials will be excluded.
if (nrow(x)!= 150) { return(TRUE) }
if (sum(is.na(x$estY))>0) { return(TRUE) }
# exclusion 2. Participants whose y-axis estimate fell below the triangle base (y=0) for more than 20% of the trials will be excluded.
if ((sum(x$estY<0)/length(x$estY)) > 0.2) { return(TRUE) }
# exclusion 3. Participants whose x-axis estimate fell to the left of the x-coordinate of the left vertex or to the right of the x-coordinate of the right vertex for more than 20% of the trials will be excluded.
left = sum(x$scaledX < x$leftX)
right = sum(x$scaledX > x$rightX)
if ((left + right)/length(x$scaledX) > 0.2) { return(TRUE) }
return(FALSE)
}
subjects = unique(data$subject)
exclude = numeric()
for (s in subjects) {
if (exclude_subject(s)) {exclude=c(exclude, s)}
}
data = data %>% filter(! subject %in% exclude)
Non-linear regression: side length ~ y-axis deviation (throws an error, needs to be fixed)
delta = data %>%
group_by(subject, sideLen) %>%
summarize(meanDeltaY = mean(deltaY))
subjects = unique(delta$subject)
bs = numeric()
for (s in subjects) {
x = delta[delta$subject==s,]
fit = nlsLM(sideLen ~ a*meanDeltaY^b, data=x, start=list(a=1, b=1))
bs <- c(bs, coef(fit)['b'])
}
mean(bs)
sd(bs)
(1-mean(bs))/sd(bs)
t.test(bs, mu=1)
Non-linear regression: side length ~ std y-axis estimates
sigma = data %>%
group_by(subject, sideLen) %>%
summarize(estYstd = sd(estY))
subjects = unique(sigma$subject)
bs = numeric()
for (s in subjects) {
x = sigma[sigma$subject==s,]
fit = nlsLM(sideLen ~ a*estYstd^b, data=x, start=list(a=1, b=1))
bs <- c(bs, coef(fit)['b'])
}
mean(bs)
## [1] 0.05866644
sd(bs)
## [1] 0.8671245
(1-mean(bs))/sd(bs)
## [1] 1.085581
t.test(bs, mu=1)
##
## One Sample t-test
##
## data: bs
## t = -2.1712, df = 3, p-value = 0.1183
## alternative hypothesis: true mean is not equal to 1
## 95 percent confidence interval:
## -1.321122 1.438455
## sample estimates:
## mean of x
## 0.05866644
Plot 1
plotdata1 = data %>%
group_by(subject, baseAngle, sideLen) %>%
summarize(meanDeltaY=mean(deltaY)) %>% # within subject mean y-axis deviation
group_by(baseAngle, sideLen) %>%
summarize(stdDeltaY=sd(meanDeltaY), meanDeltaY=mean(meanDeltaY)) # across subject mean and std y-axis deviation
plotdata1$baseAngle = as.factor(plotdata1$baseAngle) # for coloring
ggplot(plotdata1, aes(x=sideLen, y=meanDeltaY, color=baseAngle)) +
geom_point() +
geom_errorbar(aes(ymin=meanDeltaY-stdDeltaY/2, ymax=meanDeltaY+stdDeltaY/2)) +
labs(x='L, triangle side length', y='δ, bias', color='Base Angle')
Plot 2
plotdata2 = data %>%
group_by(subject, baseLen, sideLen) %>%
summarize(sigmaY=sd(scaledY)) %>% # within subject std y-axis estimates
group_by(baseLen, sideLen) %>%
summarize(stdSigmaY=sd(sigmaY), meanSigmaY=mean(sigmaY)) # across subject std y-axis estimates
plotdata2$baseLen = as.factor(plotdata2$baseLen) # for coloring
ggplot(plotdata2, aes(x=sideLen, y=meanSigmaY, color=baseLen)) +
geom_point() +
geom_errorbar(aes(ymin=meanSigmaY-stdSigmaY/2, ymax=meanSigmaY+stdSigmaY/2)) +
labs(x='L, triangle side length', y='σ, std', color='Base Length')