rm(list=ls())
library(lsr)
library(lme4)
## Loading required package: Matrix
## Loading required package: Rcpp
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rjson)
## Warning: package 'rjson' was built under R version 3.1.2
library(RSQLite)
## Loading required package: DBI
library(stringr)
library(ggplot2)
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.1.2
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: splines
## Loading required package: Formula
## Warning: package 'Formula' was built under R version 3.1.2
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
library(tidyr)
mean.na.rm <- function(x) { mean(x,na.rm=T) }
sum.na.rm <- function(x) { sum(x,na.rm=T) }
stderr <- function(x) sqrt(var(x)/length(x))
con = dbConnect(SQLite(),dbname = "participants.db");
df.complete = dbReadTable(con,"turkdemo") #change the name of the database here (mine was called "almost")
dbDisconnect(con)
## [1] TRUE
#filter out incompletes (using dplyr methods)
df.complete = subset(df.complete,status %in% c(3,4))
#filter to a particular day (if I haven't set codeversions). OR together multiple days if needed
df.complete$currentVersion = str_detect(df.complete$beginhit, "2014-12-11")
df.complete = subset(df.complete, currentVersion %in% TRUE)
#Note: Compile in wide form: 1 row/participant; each trial gets a series of column names, formatted XYFIELD_#
#Also, no extra underscores in the column names, this breaks wideToLong
#df.wide = data.frame(NULL)
df.wide = data.frame(matrix(nrow=nrow(df.complete),ncol=4))
colnames(df.wide) = c("participant","workerId","browser","beginhit") #will dynamically add columns from datastring below
for (i in 1:nrow(df.wide)){
a = fromJSON(df.complete$datastring[i])
#Note! Some badness is happening in finding these indices, need to fix this loop so it doesn't go beyond a's limit
mylength = length(a$data)
print(mylength)
#Weird: Some of the people (~10) have the wrong number of blocks! For now, just take the ones who are length 23, but what the hell!
if (mylength == 23){
df.wide$participant[i] = i
df.wide$workerId[i] = a$workerId
df.wide$browser[i] = df.complete$browser[i]
df.wide$beginhit[i] = df.complete$beginhit[i]
#cycle through all the trials, but only record where isTestTrial = 1
for (j in 1:mylength){
if(a$data[[j]]$trialdata$isTestTrial == "1"){
df.wide[[paste("rt_",j, sep="")]][i] = a$data[[j]]$trialdata$rt
df.wide[[paste("keypress_",j, sep="")]][i] = a$data[[j]]$trialdata$key_press
df.wide[[paste("stimcondition_",j, sep="")]][i] = a$data[[j]]$trialdata$stimcondition
df.wide[[paste("exposureManner_",j, sep="")]][i] = a$data[[j]]$trialdata$exposure_manner
df.wide[[paste("exposurePath_",j, sep="")]][i] = a$data[[j]]$trialdata$exposure_path
df.wide[[paste("exposureNumber_",j, sep="")]][i] = a$data[[j]]$trialdata$exposure_number
df.wide[[paste("condition_",j, sep="")]][i] = a$data[[j]]$trialdata$condition_name
} #Else just don't make any columns right now!!!
}
#And grab the info we need from the last 'trial' (feedback)
if (is.null(a$data[[mylength-1]]$trialdata$Q0)){df.wide$feedback[i] = "none"
}else{
df.wide$feedback[i] = a$data[[mylength-1]]$trialdata$Q0
}
}
} #End of this participant
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 45
## [1] 23
## [1] 27
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 44
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 25
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 46
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 23
## [1] 26
## [1] 23
Weird behavior! I got those wrong-lenght participants to be assigned a participant no of NA, which is something, anyway.
Lost 6 people to this.
nrow(df.wide)
## [1] 101
df.wide = df.wide[!is.na(df.wide$participant),]
nrow(df.wide)
## [1] 95
Reformat into long form!
df.long = wideToLong(subset(df.wide,select=-feedback),within="trial")
#create factors
df.long = mutate(df.long, participant = as.numeric(participant),
trial = as.numeric(as.character(trial)),
rt = as.numeric(as.character(rt)),
keypress = as.numeric(as.character(keypress))-48, #transform keycodes to numerals!
stimcondition = factor(stimcondition,levels=c("mismatch","match", "mannerchange","pathchange")),
exposureNumber = as.numeric(as.character(exposureNumber)),
condition = factor(condition, levels=c("Noun","Verb")))
df.long = df.long[order(df.long$participant,df.long$trial),]
Response variable histograms. RTs go a little long, maybe clip these at 8s?
qplot(keypress, facets=stimcondition~condition,
data=df.long)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
qplot(rt, facets=stimcondition~condition,
data=df.long)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Basic summary. This is the first finding, that N/V construal changes the weighting on manner/path generalization. I think this is kinda cool, actually.
ms <- df.long %>%
filter(rt < 8000, !grepl("debug*", df.long$workerId)) %>%
group_by(stimcondition, condition, workerId) %>%
summarise(resp = mean(keypress)) %>%
group_by(stimcondition, condition, add=FALSE) %>%
summarise(ci = stderr(resp)*1.96,
resp = mean(resp))
ms$stimcondition <- factor(ms$stimcondition,
levels=c("match","mannerchange","pathchange", "mismatch"))
qplot(stimcondition, resp, fill=condition,
position="dodge", geom="bar", stat="identity",
data=ms) +
geom_linerange(aes(ymin=resp - ci, ymax = resp + ci),
position = position_dodge(width=.9))
Change score. A different way of capturing this.
changescore <- df.long %>%
filter(rt < 8000, !grepl("debug*", df.long$workerId)) %>%
group_by(stimcondition, condition, workerId) %>%
summarise(resp = mean(keypress)) %>%
ungroup() %>%
spread(stimcondition, resp) %>%
mutate(mp.diff = abs(mannerchange - pathchange))
with(changescore, t.test(mp.diff[condition=="Verb"],
mp.diff[condition=="Noun"]))
##
## Welch Two Sample t-test
##
## data: mp.diff[condition == "Verb"] and mp.diff[condition == "Noun"]
## t = 2.119, df = 79.14, p-value = 0.0372
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.04186 1.33480
## sample estimates:
## mean of x mean of y
## 2.187 1.498
Now the individual diffs analyss.
Try linear mixed effect model. Not much doing.
mod <- lmer(keypress ~ stimcondition * condition + (stimcondition | workerId),
data=df.long)
summary(mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: keypress ~ stimcondition * condition + (stimcondition | workerId)
## Data: df.long
##
## REML criterion at convergence: 4694
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.259 -0.239 0.033 0.239 5.980
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 1.686 1.299
## stimconditionmatch 3.245 1.802 -0.82
## stimconditionmannerchange 2.150 1.466 -0.20 0.25
## stimconditionpathchange 1.377 1.174 -0.22 0.30 -0.30
## Residual 0.604 0.777
## Number of obs: 1615, groups: workerId, 95
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.092 0.194 10.80
## stimconditionmatch 4.333 0.268 16.17
## stimconditionmannerchange 1.337 0.224 5.98
## stimconditionpathchange 1.036 0.185 5.59
## conditionVerb -0.179 0.278 -0.64
## stimconditionmatch:conditionVerb 0.224 0.385 0.58
## stimconditionmannerchange:conditionVerb 0.837 0.321 2.60
## stimconditionpathchange:conditionVerb -0.291 0.266 -1.09
##
## Correlation of Fixed Effects:
## (Intr) stmcndtnmt stmcndtnmn stmcndtnp cndtnV stmcndtnmt:V
## stmcndtnmtc -0.815
## stmcndtnmnn -0.250 0.274
## stmcndtnpth -0.276 0.321 -0.184
## conditinVrb -0.696 0.567 0.174 0.192
## stmcndtnmt:V 0.567 -0.696 -0.191 -0.224 -0.815
## stmcndtnmn:V 0.174 -0.191 -0.696 0.128 -0.250 0.274
## stmcndtnp:V 0.192 -0.224 0.128 -0.696 -0.276 0.321
## stmcndtnmn:V
## stmcndtnmtc
## stmcndtnmnn
## stmcndtnpth
## conditinVrb
## stmcndtnmt:V
## stmcndtnmn:V
## stmcndtnp:V -0.184
Plot manner vs. path scores:
qplot(mannerchange, pathchange, facets=.~condition,
data=changescore) +
geom_smooth(method="lm") +
xlim(c(1,7)) + ylim(c(1,7))
Quantify and test this. Could do it both as a lmer and as an lm over means.
summary(lm(mannerchange ~ pathchange * condition, data=changescore))
##
## Call:
## lm(formula = mannerchange ~ pathchange * condition, data = changescore)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.176 -1.403 0.082 1.211 3.192
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.337 0.574 4.07 0.0001 ***
## pathchange 0.368 0.160 2.30 0.0239 *
## conditionVerb 1.849 0.778 2.38 0.0196 *
## pathchange:conditionVerb -0.405 0.235 -1.73 0.0875 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.78 on 90 degrees of freedom
## Multiple R-squared: 0.0791, Adjusted R-squared: 0.0484
## F-statistic: 2.58 on 3 and 90 DF, p-value: 0.0587