Preliminaries

rm(list=ls())
library(lsr)
library(lme4)
## Loading required package: Matrix
## Loading required package: Rcpp
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## 
## Attaching package: 'tidyr'
## 
## The following object is masked from 'package:Matrix':
## 
##     expand
library(rjson)
## Warning: package 'rjson' was built under R version 3.1.2
library(RSQLite)
## Loading required package: DBI
library(stringr)
library(ggplot2)
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.1.2
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: splines
## Loading required package: Formula
## Warning: package 'Formula' was built under R version 3.1.2
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     combine, src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
mean.na.rm <- function(x) { mean(x,na.rm=T) }
sum.na.rm <- function(x) { sum(x,na.rm=T) }
stderr <- function(x) sqrt(var(x)/length(x))

Read data.

# Read data ---------------------------------------------------------------

con = dbConnect(SQLite(),dbname = "many-dax-jspsych-psiturk-experiment_1_13_15/participants.db");
df.complete = dbReadTable(con,"turkdemo") #change the name of the database here (mine was called "almost")
dbDisconnect(con)
## [1] TRUE
#filter out incompletes (using dplyr methods)
df.complete = subset(df.complete,status %in% c(3,4)) 

nrow(df.complete) #includes alll subjects ever plus all debug attempts!
## [1] 422
#filter to a particular day (if I haven't set codeversions). OR together multiple days if needed
df.complete$currentVersion1 = str_detect(df.complete$beginhit, "2015-01-13")
df.complete$currentVersion2 = str_detect(df.complete$beginhit, "2015-01-14")
df.complete$currentVersion3 = str_detect(df.complete$beginhit, "2015-01-15")
df.complete$currentVersion4 = str_detect(df.complete$beginhit, "2015-01-16")


## include all participants
df.complete <- filter(df.complete, 
                      currentVersion1 | currentVersion2 | 
                        currentVersion3 | currentVersion4)


nrow(df.complete)
## [1] 307
#filter out 'debug' participants!
df.complete = filter(df.complete, !str_detect(df.complete$workerid,"debug"))
nrow(df.complete)
## [1] 302

Structure data.

#Note: Compile in wide form: 1 row/participant; each trial gets a series of column names, formatted XYFIELD_#
#Also, no extra underscores in the column names, this breaks wideToLong
#df.wide = data.frame(NULL)
df.wide = data.frame(matrix(nrow=nrow(df.complete),ncol=4))
colnames(df.wide) = c("participant","workerId","browser","beginhit") #will dynamically add columns from datastring below

for (i in 1:nrow(df.wide)){
  if (!is.na(df.complete$datastring[i])){
    a = fromJSON(df.complete$datastring[i])
    mylength = length(a$data)
  } else{
    a = data.frame(NULL)
    mylength = 0
  }
  print(mylength)
  if (mylength==27){
    df.wide$participant[i] = i
    df.wide$workerId[i] = a$workerId
    df.wide$browser[i] = df.complete$browser[i]
    df.wide$beginhit[i] = df.complete$beginhit[i]
    #cycle through all the trials, but only record where isTestTrial = 1
    for (j in 1:mylength){
      if(a$data[[j]]$trialdata$isTestTrial == "1"){
        df.wide[[paste("rt_",j, sep="")]][i] = a$data[[j]]$trialdata$rt
        df.wide[[paste("keypress_",j, sep="")]][i] = a$data[[j]]$trialdata$key_press
        df.wide[[paste("stimCondition_",j, sep="")]][i] = a$data[[j]]$trialdata$stimCondition
        df.wide[[paste("stimName_",j, sep="")]][i] = a$data[[j]]$trialdata$stimName
        df.wide[[paste("exposurePath_",j, sep="")]][i] = a$data[[j]]$trialdata$exposure_path
        df.wide[[paste("exposureManner_",j, sep="")]][i] = a$data[[j]]$trialdata$exposure_manner
        df.wide[[paste("condition_",j, sep="")]][i] = a$data[[j]]$trialdata$condition_name
      } #Else just don't make any columns right now!!!
    }
  }

  #And grab the info we need from the last 'trial' (feedback)
  if (is.null(a$data[[mylength-1]]$trialdata$responses)){df.wide$feedback[i] = "none"
  }else{
    df.wide$feedback[i] = a$data[[mylength-1]]$trialdata$responses
  }
} #End of this participant
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 36
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 30
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 29
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 60
## [1] 27
## [1] 27
## [1] 31
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 30
## [1] 29
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 28
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 54
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 26
## [1] 27
## [1] 27
## [1] 29
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 54
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 31
## [1] 27
## [1] 27
## [1] 27
## [1] 28
## [1] 27
## [1] 0
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 52
## [1] 27
## [1] 27
## [1] 52
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 54
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 33
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 43
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 28
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 29
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 27
## [1] 30
## [1] 27
## [1] 27
## [1] 27
## [1] 29
## [1] 27
#Notes: Something up with 1/16/15 subj 71's datastring:it's missing - recorded at end, so presumably lost before that?  Add a check for null datastrings

#Weird behavior! I got those wrong-lenght participants to be assigned a participant no of NA, which is something, anyway.
#Lost 6 people to this.
nrow(df.wide)
## [1] 302
df.wide = df.wide[!is.na(df.wide$participant),]
nrow(df.wide)
## [1] 278
#OOPS from 1/15/15: I didn't have the first trial (set to show the prototype movie) save the right variables, so record them
#here

df.wide$exposurePath_5 =df.wide$exposurePath_6
df.wide$exposureManner_5 =df.wide$exposureManner_6
df.wide$condition_5 =df.wide$condition_6
  

#Reformat into long form!
df.long = wideToLong(subset(df.wide,select=-feedback),within="trial")

#create factors
df.long = mutate(df.long, participant = as.numeric(participant),
          trial = as.numeric(as.character(trial)),
          rt = as.numeric(as.character(rt)),
          keypress = as.numeric(as.character(keypress))-48, #transform keycodes to numerals!
          stimCondition = factor(stimCondition,levels=c("NoChange","BothChange", "PathChange","MannerChange")),
          condition = factor(condition, levels=c("Noun","Verb")))

df.long = df.long[order(df.long$participant,df.long$trial),]

Analyze data

Response variable histograms. RTs go a little long, maybe clip these at 16s?

qplot(keypress, facets=stimCondition~condition,
      data=df.long)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-4

qplot(rt, facets=stimCondition~condition,
      data=df.long) + geom_vline(xintercept=16000, lty=2, col="red")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-4

Basic summary. This is the first finding, that N/V construal changes the weighting on manner/path generalization. I think this is kinda cool, actually.

ms <- df.long %>%
  filter(rt < 16000) %>%
  group_by(stimCondition, condition, workerId) %>%
  summarise(resp = mean(keypress)) %>%
  group_by(stimCondition, condition, add=FALSE) %>%
  summarise(ci = stderr(resp)*1.96,
            resp = mean(resp))
  
ms$stimCondition <- factor(ms$stimCondition, 
                           levels=c("NoChange","MannerChange","PathChange", "Bothchange"))

qplot(stimCondition, resp, fill=condition, 
      position="dodge", geom="bar", stat="identity",
      data=ms) + 
  geom_linerange(aes(ymin=resp - ci, ymax = resp + ci), 
                 position = position_dodge(width=.9))

plot of chunk unnamed-chunk-5

Change score. A different way of capturing this.

changescore <- df.long %>%
  filter(rt < 16000) %>%
  group_by(stimCondition, condition, workerId) %>%
  summarise(resp = mean(keypress)) %>%
  ungroup() %>%
  spread(stimCondition, resp) %>%
  mutate(mp.diff = abs(MannerChange - PathChange))

with(changescore, t.test(mp.diff[condition=="Verb"],
                         mp.diff[condition=="Noun"]))     
## 
##  Welch Two Sample t-test
## 
## data:  mp.diff[condition == "Verb"] and mp.diff[condition == "Noun"]
## t = 0.7546, df = 273.2, p-value = 0.4511
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2023  0.4538
## sample estimates:
## mean of x mean of y 
##     2.044     1.918

Try linear mixed effect model. Not much doing.

mod <- lmer(keypress ~ stimCondition * condition + (stimCondition | workerId), 
     data=df.long)
summary(mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: keypress ~ stimCondition * condition + (stimCondition | workerId)
##    Data: df.long
## 
## REML criterion at convergence: 19630
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.683 -0.484  0.018  0.392  4.633 
## 
## Random effects:
##  Groups   Name                      Variance Std.Dev. Corr             
##  workerId (Intercept)               0.432    0.657                     
##           stimConditionBothChange   1.445    1.202    -0.79            
##           stimConditionPathChange   2.018    1.421    -0.35  0.47      
##           stimConditionMannerChange 2.326    1.525    -0.31  0.60 -0.16
##  Residual                           1.262    1.123                     
## Number of obs: 5838, groups:  workerId, 276
## 
## Fixed effects:
##                                         Estimate Std. Error t value
## (Intercept)                               6.4224     0.0686    93.6
## stimConditionBothChange                  -4.5966     0.1179   -39.0
## stimConditionPathChange                  -2.9926     0.1336   -22.4
## stimConditionMannerChange                -3.2484     0.1411   -23.0
## conditionVerb                             0.1078     0.0951     1.1
## stimConditionBothChange:conditionVerb    -0.1245     0.1630    -0.8
## stimConditionPathChange:conditionVerb    -0.5604     0.1833    -3.1
## stimConditionMannerChange:conditionVerb   0.3583     0.1930     1.9
## 
## Correlation of Fixed Effects:
##             (Intr) stmCBC stmCPC stmCMC cndtnV sCBC:V sCPC:V
## stmCndtnBtC -0.758                                          
## stmCndtnPtC -0.430  0.472                                   
## stmCndtnMnC -0.395  0.570 -0.041                            
## conditinVrb -0.712  0.538  0.307  0.282                     
## stmCndtBC:V  0.540 -0.709 -0.336 -0.400 -0.759              
## stmCndtPC:V  0.310 -0.338 -0.705  0.020 -0.436  0.476       
## stmCndtMC:V  0.286 -0.404  0.020 -0.702 -0.402  0.569 -0.029

Plot manner vs. path scores:

qplot(MannerChange, PathChange, facets=.~condition, 
      data=changescore) + 
  geom_smooth(method="lm") + 
  xlim(c(1,7)) + ylim(c(1,7))
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-8

Quantify and test this. Could do it both as a lmer and as an lm over means.

summary(lm(MannerChange ~ PathChange * condition, data=changescore))
## 
## Call:
## lm(formula = MannerChange ~ PathChange * condition, data = changescore)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.391 -1.137 -0.080  0.977  3.923 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 3.822      0.326   11.74   <2e-16 ***
## PathChange                 -0.196      0.088   -2.23    0.027 *  
## conditionVerb               0.945      0.446    2.12    0.035 *  
## PathChange:conditionVerb   -0.180      0.127   -1.42    0.158    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.51 on 272 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.0951, Adjusted R-squared:  0.0851 
## F-statistic: 9.53 on 3 and 272 DF,  p-value: 5.25e-06

Super-Exploratory

Do the same analyses, but filtering for participants that didn’t mark NoChange as a 6 or 7.

bad.participants <- df.long %>% 
  group_by(stimCondition, condition, workerId) %>%
  summarise(resp = mean(keypress)) %>% 
  filter(resp < 6 & stimCondition == "NoChange") %>% 
  ungroup() %>%
  select(workerId)

df.filtered <- df.long %>% filter(!(workerId %in% bad.participants$workerId))
ms <- df.filtered %>%
  filter(rt < 16000) %>%
  group_by(stimCondition, condition, workerId) %>%
  summarise(resp = mean(keypress)) %>%
  group_by(stimCondition, condition, add=FALSE) %>%
  summarise(ci = stderr(resp)*1.96,
            resp = mean(resp))
  
ms$stimCondition <- factor(ms$stimCondition, 
                           levels=c("NoChange","MannerChange","PathChange", "Bothchange"))

qplot(stimCondition, resp, fill=condition, 
      position="dodge", geom="bar", stat="identity",
      data=ms) + 
  geom_linerange(aes(ymin=resp - ci, ymax = resp + ci), 
                 position = position_dodge(width=.9))

plot of chunk unnamed-chunk-11

Change score. A different way of capturing this.

changescore <- df.filtered %>%
  filter(rt < 16000) %>%
  group_by(stimCondition, condition, workerId) %>%
  summarise(resp = mean(keypress)) %>%
  ungroup() %>%
  spread(stimCondition, resp) %>%
  mutate(mp.diff = abs(MannerChange - PathChange))

with(changescore, t.test(mp.diff[condition=="Verb"],
                         mp.diff[condition=="Noun"]))     
## 
##  Welch Two Sample t-test
## 
## data:  mp.diff[condition == "Verb"] and mp.diff[condition == "Noun"]
## t = 0.5054, df = 224.9, p-value = 0.6138
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2742  0.4633
## sample estimates:
## mean of x mean of y 
##     2.158     2.063

Try linear mixed effect model. Not much doing.

mod <- lmer(keypress ~ stimCondition * condition + (stimCondition | workerId), 
     data=df.filtered)
summary(mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: keypress ~ stimCondition * condition + (stimCondition | workerId)
##    Data: df.filtered
## 
## REML criterion at convergence: 15752
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -5.418 -0.563  0.087  0.315  4.901 
## 
## Random effects:
##  Groups   Name                      Variance Std.Dev. Corr             
##  workerId (Intercept)               0.00942  0.0971                    
##           stimConditionBothChange   0.43896  0.6625   -0.15            
##           stimConditionPathChange   1.74435  1.3207    0.54  0.09      
##           stimConditionMannerChange 2.25998  1.5033    0.20  0.55 -0.43
##  Residual                           1.15169  1.0732                    
## Number of obs: 4872, groups:  workerId, 232
## 
## Fixed effects:
##                                         Estimate Std. Error t value
## (Intercept)                               6.7365     0.0438   153.8
## stimConditionBothChange                  -4.9937     0.0906   -55.1
## stimConditionPathChange                  -3.3213     0.1436   -23.1
## stimConditionMannerChange                -3.5156     0.1598   -22.0
## conditionVerb                             0.0601     0.0592     1.0
## stimConditionBothChange:conditionVerb    -0.0943     0.1224    -0.8
## stimConditionPathChange:conditionVerb    -0.4548     0.1942    -2.3
## stimConditionMannerChange:conditionVerb   0.3772     0.2160     1.7
## 
## Correlation of Fixed Effects:
##             (Intr) stmCBC stmCPC stmCMC cndtnV sCBC:V sCPC:V
## stmCndtnBtC -0.484                                          
## stmCndtnPtC -0.185  0.198                                   
## stmCndtnMnC -0.221  0.490 -0.272                            
## conditinVrb -0.740  0.358  0.137  0.163                     
## stmCndtBC:V  0.358 -0.740 -0.147 -0.362 -0.484              
## stmCndtPC:V  0.137 -0.147 -0.740  0.201 -0.185  0.198       
## stmCndtMC:V  0.163 -0.362  0.201 -0.740 -0.221  0.490 -0.272
anova(mod)
## Analysis of Variance Table
##                         Df Sum Sq Mean Sq F value
## stimCondition            3   8409    2803 2433.72
## condition                1      0       0    0.30
## stimCondition:condition  3      9       3    2.75

Plot manner vs. path scores:

qplot(MannerChange, PathChange, facets=.~condition, 
      data=changescore) + 
  geom_smooth(method="lm") + 
  xlim(c(1,7)) + ylim(c(1,7))
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-14

summary(lm(MannerChange ~ PathChange * condition, data=changescore))
## 
## Call:
## lm(formula = MannerChange ~ PathChange * condition, data = changescore)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.404 -1.078 -0.146  0.967  3.892 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                4.2426     0.3719   11.41   <2e-16 ***
## PathChange                -0.2984     0.0996   -3.00    0.003 ** 
## conditionVerb              0.6612     0.4909    1.35    0.179    
## PathChange:conditionVerb  -0.1184     0.1382   -0.86    0.392    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.53 on 227 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.124,  Adjusted R-squared:  0.113 
## F-statistic: 10.7 on 3 and 227 DF,  p-value: 1.25e-06