rm(list=ls())
library(lsr)
library(lme4)
## Loading required package: Matrix
## Loading required package: Rcpp
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
##
## Attaching package: 'tidyr'
##
## The following object is masked from 'package:Matrix':
##
## expand
library(rjson)
## Warning: package 'rjson' was built under R version 3.1.2
library(RSQLite)
## Loading required package: DBI
library(stringr)
library(ggplot2)
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.1.2
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: splines
## Loading required package: Formula
## Warning: package 'Formula' was built under R version 3.1.2
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## combine, src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
mean.na.rm <- function(x) { mean(x,na.rm=T) }
sum.na.rm <- function(x) { sum(x,na.rm=T) }
stderr <- function(x) sqrt(var(x)/length(x))
Read data.
# Read data ---------------------------------------------------------------
con = dbConnect(SQLite(),dbname = "many-dax-jspsych-psiturk-experiment_1_13_15/participants.db");
df.complete = dbReadTable(con,"turkdemo") #change the name of the database here (mine was called "almost")
dbDisconnect(con)
## [1] TRUE
#filter out incompletes (using dplyr methods)
df.complete = subset(df.complete,status %in% c(3,4))
nrow(df.complete) #includes alll subjects ever plus all debug attempts!
## [1] 422
#filter to a particular day (if I haven't set codeversions). OR together multiple days if needed
df.complete$currentVersion1 = str_detect(df.complete$beginhit, "2015-01-13")
df.complete$currentVersion2 = str_detect(df.complete$beginhit, "2015-01-14")
df.complete$currentVersion3 = str_detect(df.complete$beginhit, "2015-01-15")
df.complete$currentVersion4 = str_detect(df.complete$beginhit, "2015-01-16")
## include all participants
df.complete <- filter(df.complete,
currentVersion1 | currentVersion2 |
currentVersion3 | currentVersion4)
nrow(df.complete)
## [1] 307
#filter out 'debug' participants!
df.complete = filter(df.complete, !str_detect(df.complete$workerid,"debug"))
nrow(df.complete)
## [1] 302
Structure data.
#Note: Compile in wide form: 1 row/participant; each trial gets a series of column names, formatted XYFIELD_#
#Also, no extra underscores in the column names, this breaks wideToLong
#df.wide = data.frame(NULL)
df.wide = data.frame(matrix(nrow=nrow(df.complete),ncol=4))
colnames(df.wide) = c("participant","workerId","browser","beginhit") #will dynamically add columns from datastring below
for (i in 1:nrow(df.wide)){
if (!is.na(df.complete$datastring[i])){
a = fromJSON(df.complete$datastring[i])
mylength = length(a$data)
} else{
a = data.frame(NULL)
mylength = 0
}
print(mylength)
if (mylength==27){
df.wide$participant[i] = i
df.wide$workerId[i] = a$workerId
df.wide$browser[i] = df.complete$browser[i]
df.wide$beginhit[i] = df.complete$beginhit[i]
#cycle through all the trials, but only record where isTestTrial = 1
for (j in 1:mylength){
if(a$data[[j]]$trialdata$isTestTrial == "1"){
df.wide[[paste("rt_",j, sep="")]][i] = a$data[[j]]$trialdata$rt
df.wide[[paste("keypress_",j, sep="")]][i] = a$data[[j]]$trialdata$key_press
df.wide[[paste("stimCondition_",j, sep="")]][i] = a$data[[j]]$trialdata$stimCondition
df.wide[[paste("stimName_",j, sep="")]][i] = a$data[[j]]$trialdata$stimName
df.wide[[paste("exposurePath_",j, sep="")]][i] = a$data[[j]]$trialdata$exposure_path
df.wide[[paste("exposureManner_",j, sep="")]][i] = a$data[[j]]$trialdata$exposure_manner
df.wide[[paste("condition_",j, sep="")]][i] = a$data[[j]]$trialdata$condition_name
} #Else just don't make any columns right now!!!
}
}
#And grab the info we need from the last 'trial' (feedback)
if (is.null(a$data[[mylength-1]]$trialdata$responses)){df.wide$feedback[i] = "none"
}else{
df.wide$feedback[i] = a$data[[mylength-1]]$trialdata$responses
}
} #End of this participant
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 36
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 30
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 29
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 60
## [1] 27
## [1] 27
## [1] 31
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 30
## [1] 29
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 28
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 54
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 26
## [1] 27
## [1] 27
## [1] 29
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 54
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 31
## [1] 27
## [1] 27
## [1] 27
## [1] 28
## [1] 27
## [1] 0
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 52
## [1] 27
## [1] 27
## [1] 52
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 54
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 33
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 43
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 28
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 29
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 30
## [1] 27
## [1] 27
## [1] 27
## [1] 29
## [1] 27
#Notes: Something up with 1/16/15 subj 71's datastring:it's missing - recorded at end, so presumably lost before that? Add a check for null datastrings
#Weird behavior! I got those wrong-lenght participants to be assigned a participant no of NA, which is something, anyway.
#Lost 6 people to this.
nrow(df.wide)
## [1] 302
df.wide = df.wide[!is.na(df.wide$participant),]
nrow(df.wide)
## [1] 278
#OOPS from 1/15/15: I didn't have the first trial (set to show the prototype movie) save the right variables, so record them
#here
df.wide$exposurePath_5 =df.wide$exposurePath_6
df.wide$exposureManner_5 =df.wide$exposureManner_6
df.wide$condition_5 =df.wide$condition_6
#Reformat into long form!
df.long = wideToLong(subset(df.wide,select=-feedback),within="trial")
#create factors
df.long = mutate(df.long, participant = as.numeric(participant),
trial = as.numeric(as.character(trial)),
rt = as.numeric(as.character(rt)),
keypress = as.numeric(as.character(keypress))-48, #transform keycodes to numerals!
stimCondition = factor(stimCondition,levels=c("NoChange","BothChange", "PathChange","MannerChange")),
condition = factor(condition, levels=c("Noun","Verb")))
df.long = df.long[order(df.long$participant,df.long$trial),]
Response variable histograms. RTs go a little long, maybe clip these at 16s?
qplot(keypress, facets=stimCondition~condition,
data=df.long)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
qplot(rt, facets=stimCondition~condition,
data=df.long) + geom_vline(xintercept=16000, lty=2, col="red")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Basic summary. This is the first finding, that N/V construal changes the weighting on manner/path generalization. I think this is kinda cool, actually.
ms <- df.long %>%
filter(rt < 16000) %>%
group_by(stimCondition, condition, workerId) %>%
summarise(resp = mean(keypress)) %>%
group_by(stimCondition, condition, add=FALSE) %>%
summarise(ci = stderr(resp)*1.96,
resp = mean(resp))
ms$stimCondition <- factor(ms$stimCondition,
levels=c("NoChange","MannerChange","PathChange", "Bothchange"))
qplot(stimCondition, resp, fill=condition,
position="dodge", geom="bar", stat="identity",
data=ms) +
geom_linerange(aes(ymin=resp - ci, ymax = resp + ci),
position = position_dodge(width=.9))
Change score. A different way of capturing this.
changescore <- df.long %>%
filter(rt < 16000) %>%
group_by(stimCondition, condition, workerId) %>%
summarise(resp = mean(keypress)) %>%
ungroup() %>%
spread(stimCondition, resp) %>%
mutate(mp.diff = abs(MannerChange - PathChange))
with(changescore, t.test(mp.diff[condition=="Verb"],
mp.diff[condition=="Noun"]))
##
## Welch Two Sample t-test
##
## data: mp.diff[condition == "Verb"] and mp.diff[condition == "Noun"]
## t = 0.7546, df = 273.2, p-value = 0.4511
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2023 0.4538
## sample estimates:
## mean of x mean of y
## 2.044 1.918
Try linear mixed effect model. Not much doing.
mod <- lmer(keypress ~ stimCondition * condition + (stimCondition | workerId),
data=df.long)
summary(mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: keypress ~ stimCondition * condition + (stimCondition | workerId)
## Data: df.long
##
## REML criterion at convergence: 19630
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.683 -0.484 0.018 0.392 4.633
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 0.432 0.657
## stimConditionBothChange 1.445 1.202 -0.79
## stimConditionPathChange 2.018 1.421 -0.35 0.47
## stimConditionMannerChange 2.326 1.525 -0.31 0.60 -0.16
## Residual 1.262 1.123
## Number of obs: 5838, groups: workerId, 276
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.4224 0.0686 93.6
## stimConditionBothChange -4.5966 0.1179 -39.0
## stimConditionPathChange -2.9926 0.1336 -22.4
## stimConditionMannerChange -3.2484 0.1411 -23.0
## conditionVerb 0.1078 0.0951 1.1
## stimConditionBothChange:conditionVerb -0.1245 0.1630 -0.8
## stimConditionPathChange:conditionVerb -0.5604 0.1833 -3.1
## stimConditionMannerChange:conditionVerb 0.3583 0.1930 1.9
##
## Correlation of Fixed Effects:
## (Intr) stmCBC stmCPC stmCMC cndtnV sCBC:V sCPC:V
## stmCndtnBtC -0.758
## stmCndtnPtC -0.430 0.472
## stmCndtnMnC -0.395 0.570 -0.041
## conditinVrb -0.712 0.538 0.307 0.282
## stmCndtBC:V 0.540 -0.709 -0.336 -0.400 -0.759
## stmCndtPC:V 0.310 -0.338 -0.705 0.020 -0.436 0.476
## stmCndtMC:V 0.286 -0.404 0.020 -0.702 -0.402 0.569 -0.029
Plot manner vs. path scores:
qplot(MannerChange, PathChange, facets=.~condition,
data=changescore) +
geom_smooth(method="lm") +
xlim(c(1,7)) + ylim(c(1,7))
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
Quantify and test this. Could do it both as a lmer and as an lm over means.
summary(lm(MannerChange ~ PathChange * condition, data=changescore))
##
## Call:
## lm(formula = MannerChange ~ PathChange * condition, data = changescore)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.391 -1.137 -0.080 0.977 3.923
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.822 0.326 11.74 <2e-16 ***
## PathChange -0.196 0.088 -2.23 0.027 *
## conditionVerb 0.945 0.446 2.12 0.035 *
## PathChange:conditionVerb -0.180 0.127 -1.42 0.158
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.51 on 272 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.0951, Adjusted R-squared: 0.0851
## F-statistic: 9.53 on 3 and 272 DF, p-value: 5.25e-06
Do the same analyses, but filtering for participants that didn’t mark NoChange as a 6 or 7.
bad.participants <- df.long %>%
group_by(stimCondition, condition, workerId) %>%
summarise(resp = mean(keypress)) %>%
filter(resp < 6 & stimCondition == "NoChange") %>%
ungroup() %>%
select(workerId)
df.filtered <- df.long %>% filter(!(workerId %in% bad.participants$workerId))
ms <- df.filtered %>%
filter(rt < 16000) %>%
group_by(stimCondition, condition, workerId) %>%
summarise(resp = mean(keypress)) %>%
group_by(stimCondition, condition, add=FALSE) %>%
summarise(ci = stderr(resp)*1.96,
resp = mean(resp))
ms$stimCondition <- factor(ms$stimCondition,
levels=c("NoChange","MannerChange","PathChange", "Bothchange"))
qplot(stimCondition, resp, fill=condition,
position="dodge", geom="bar", stat="identity",
data=ms) +
geom_linerange(aes(ymin=resp - ci, ymax = resp + ci),
position = position_dodge(width=.9))
Change score. A different way of capturing this.
changescore <- df.filtered %>%
filter(rt < 16000) %>%
group_by(stimCondition, condition, workerId) %>%
summarise(resp = mean(keypress)) %>%
ungroup() %>%
spread(stimCondition, resp) %>%
mutate(mp.diff = abs(MannerChange - PathChange))
with(changescore, t.test(mp.diff[condition=="Verb"],
mp.diff[condition=="Noun"]))
##
## Welch Two Sample t-test
##
## data: mp.diff[condition == "Verb"] and mp.diff[condition == "Noun"]
## t = 0.5054, df = 224.9, p-value = 0.6138
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2742 0.4633
## sample estimates:
## mean of x mean of y
## 2.158 2.063
Try linear mixed effect model. Not much doing.
mod <- lmer(keypress ~ stimCondition * condition + (stimCondition | workerId),
data=df.filtered)
summary(mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: keypress ~ stimCondition * condition + (stimCondition | workerId)
## Data: df.filtered
##
## REML criterion at convergence: 15752
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.418 -0.563 0.087 0.315 4.901
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## workerId (Intercept) 0.00942 0.0971
## stimConditionBothChange 0.43896 0.6625 -0.15
## stimConditionPathChange 1.74435 1.3207 0.54 0.09
## stimConditionMannerChange 2.25998 1.5033 0.20 0.55 -0.43
## Residual 1.15169 1.0732
## Number of obs: 4872, groups: workerId, 232
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.7365 0.0438 153.8
## stimConditionBothChange -4.9937 0.0906 -55.1
## stimConditionPathChange -3.3213 0.1436 -23.1
## stimConditionMannerChange -3.5156 0.1598 -22.0
## conditionVerb 0.0601 0.0592 1.0
## stimConditionBothChange:conditionVerb -0.0943 0.1224 -0.8
## stimConditionPathChange:conditionVerb -0.4548 0.1942 -2.3
## stimConditionMannerChange:conditionVerb 0.3772 0.2160 1.7
##
## Correlation of Fixed Effects:
## (Intr) stmCBC stmCPC stmCMC cndtnV sCBC:V sCPC:V
## stmCndtnBtC -0.484
## stmCndtnPtC -0.185 0.198
## stmCndtnMnC -0.221 0.490 -0.272
## conditinVrb -0.740 0.358 0.137 0.163
## stmCndtBC:V 0.358 -0.740 -0.147 -0.362 -0.484
## stmCndtPC:V 0.137 -0.147 -0.740 0.201 -0.185 0.198
## stmCndtMC:V 0.163 -0.362 0.201 -0.740 -0.221 0.490 -0.272
anova(mod)
## Analysis of Variance Table
## Df Sum Sq Mean Sq F value
## stimCondition 3 8409 2803 2433.72
## condition 1 0 0 0.30
## stimCondition:condition 3 9 3 2.75
Plot manner vs. path scores:
qplot(MannerChange, PathChange, facets=.~condition,
data=changescore) +
geom_smooth(method="lm") +
xlim(c(1,7)) + ylim(c(1,7))
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
summary(lm(MannerChange ~ PathChange * condition, data=changescore))
##
## Call:
## lm(formula = MannerChange ~ PathChange * condition, data = changescore)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.404 -1.078 -0.146 0.967 3.892
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.2426 0.3719 11.41 <2e-16 ***
## PathChange -0.2984 0.0996 -3.00 0.003 **
## conditionVerb 0.6612 0.4909 1.35 0.179
## PathChange:conditionVerb -0.1184 0.1382 -0.86 0.392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.53 on 227 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.124, Adjusted R-squared: 0.113
## F-statistic: 10.7 on 3 and 227 DF, p-value: 1.25e-06