library(dplyr)
library(readr)
library(Hmisc)
library(data.table) #subset rows based on partial match
library(lme4)
library(knitr)
data <- list.files(full.names = TRUE) %>%
lapply(read_csv) %>%
lapply(function(d) d[-1,]) %>% #remove first row from each file because it's empty
bind_rows
# Check data dimention to see if merging was succeful
dim(data)
## [1] 51347 29
# Work only with relevant columns
dat<-data[,c("stimuli", "participant","key_resp_3.corr", "EgD or Cont")]
# let R know our factor variables that looked like numerics
dat[c('participant','key_resp_3.corr')]<-lapply(dat[c('participant','key_resp_3.corr')],factor)
# Column value in EgD/Cont was inconsistent due to casing (e.g., CONT or cont)
# Create uniform entries
dat<-data.frame(lapply(dat, function(v) {
if (is.character(v)) return(toupper(v))
else return(v)
}))
# Get an idea of number of participant, unique stimuli, and repetition of each stimuli for each Ss
describe(dat)
## dat
##
## 4 Variables 51347 Observations
## ---------------------------------------------------------------------------
## stimuli
## n missing distinct
## 50560 787 80
##
## lowest : CJA1_1.JPG CJA1_2.JPG CJA1_3.JPG CJA1_4.JPG CJA1_5.JPG
## highest: CJB1_5.JPG CJB1_6.JPG CJB1_7.JPG CJB1_8.JPG CJB1_9.JPG
## ---------------------------------------------------------------------------
## participant
## n missing distinct
## 50560 787 79
##
## lowest : 1 2 3 4 5 , highest: 100 101 102 103 104
## ---------------------------------------------------------------------------
## key_resp_3.corr
## n missing distinct
## 25280 26067 2
##
## Value 0 1
## Frequency 8138 17142
## Proportion 0.322 0.678
## ---------------------------------------------------------------------------
## EgD.or.Cont
## n missing distinct
## 46720 4627 2
##
## Value CONT EGD
## Frequency 22400 24320
## Proportion 0.479 0.521
## ---------------------------------------------------------------------------
# Find out the number of repetitions per stimuli was seen by each subject
sorted <-dat %>%
group_by(participant, stimuli) %>%
summarise(no_rows = length(stimuli))
## Warning: Factor `participant` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `stimuli` contains implicit NA, consider using
## `forcats::fct_explicit_na`
head(sorted)
We have 79 subjects, 80 unique stimuli.
It seems that each stimuli was being seen 8 times by each subject, HOWEVER, that’s how PsychoPy recorded data, in fact, 4 out of the 8 entries are NAs, each subject only see each stimuli 4 times.
It would have been nice to incorporate learning curve into our model, but I don’t have the information of the order each record was created for the 8 repeatitions of the same stimuli, therefore it has to be left out of the model.
# Let's only look at stimuli that belong in category A for simplicity
dat2<-dat[dat$stimuli %like% "CJA", ] #Categories are labelled as CJA_xxx or CJB_xxx
head(dat2)
describe(dat2)
## dat2
##
## 4 Variables 25280 Observations
## ---------------------------------------------------------------------------
## stimuli
## n missing distinct
## 25280 0 40
##
## lowest : CJA1_1.JPG CJA1_2.JPG CJA1_3.JPG CJA1_4.JPG CJA1_5.JPG
## highest: CJA3_5.JPG CJA3_6.JPG CJA3_7.JPG CJA3_8.JPG CJA3_9.JPG
## ---------------------------------------------------------------------------
## participant
## n missing distinct
## 25280 0 79
##
## lowest : 1 2 3 4 5 , highest: 100 101 102 103 104
## ---------------------------------------------------------------------------
## key_resp_3.corr
## n missing distinct
## 12640 12640 2
##
## Value 0 1
## Frequency 4439 8201
## Proportion 0.351 0.649
## ---------------------------------------------------------------------------
## EgD.or.Cont
## n missing distinct
## 23360 1920 2
##
## Value CONT EGD
## Frequency 11200 12160
## Proportion 0.479 0.521
## ---------------------------------------------------------------------------
Now we are only working with Category A stimuli, and seems our Ego-depletion and control split is rather 50/50. Let’s proceed to GLMM analysis.
# dummy code ego-depletion condition (Ego_depletion = 1, Control = 0)
levels(dat2$EgD.or.Cont)<-c(0,1)
colnames(dat2)[colnames(dat2)=="EgD.or.Cont"] <- "IsEgD"
# estimate the model and store results in m
m <- glmer(key_resp_3.corr ~ IsEgD +
(1 | participant) + (1 | stimuli),
data = dat2, family = binomial, control = glmerControl(optimizer = "bobyqa"),
nAGQ = 1)
# print the mod results without correlations among fixed effects
print(m, corr = FALSE)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: key_resp_3.corr ~ IsEgD + (1 | participant) + (1 | stimuli)
## Data: dat2
## AIC BIC logLik deviance df.resid
## 13890.115 13919.577 -6941.057 13882.115 11676
## Random effects:
## Groups Name Std.Dev.
## participant (Intercept) 0.5098
## stimuli (Intercept) 0.6546
## Number of obs: 11680, groups: participant, 73; stimuli, 40
## Fixed Effects:
## (Intercept) IsEgD1
## 0.7519 -0.1009
This code took about 2 minutes to run on my Linux 16.04 distribution, so don’t panic if it doesn’t finish right away.
# bobyqa optimize does not work with high-dimensional data, need to change optimizer to incorate 'stimuli' as predictor which has 40 levels.
library(nloptr)
defaultControl <- list(algorithm="NLOPT_LN_BOBYQA",xtol_rel=1e-6,maxeval=1e5)
nloptwrap2 <- function(fn,par,lower,upper,control=list(),...) {
for (n in names(defaultControl))
if (is.null(control[[n]])) control[[n]] <- defaultControl[[n]]
res <- nloptr(x0=par,eval_f=fn,lb=lower,ub=upper,opts=control,...)
with(res,list(par=solution,
fval=objective,
feval=iterations,
conv=if (status>0) 0 else status,
message=message))
}
system.time(test.model <- glmer(key_resp_3.corr ~ IsEgD + stimuli +
(1 | participant) + (1 | stimuli),
data = dat2,
family='binomial', control=glmerControl(optimizer="nloptwrap2")))
## boundary (singular) fit: see ?isSingular
## user system elapsed
## 114.192 0.016 114.208
print(test.model, corr = FALSE)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: key_resp_3.corr ~ IsEgD + stimuli + (1 | participant) + (1 |
## stimuli)
## Data: dat2
## AIC BIC logLik deviance df.resid
## 13800.589 14117.312 -6857.295 13714.589 11637
## Random effects:
## Groups Name Std.Dev.
## participant (Intercept) 5.092e-01
## stimuli (Intercept) 6.803e-09
## Number of obs: 11680, groups: participant, 73; stimuli, 40
## Fixed Effects:
## (Intercept) IsEgD1 stimuliCJA1_2.JPG
## 1.6876 -0.1012 -0.3137
## stimuliCJA1_3.JPG stimuliCJA1_4.JPG stimuliCJA1_5.JPG
## -0.3746 -0.4141 -0.3137
## stimuliCJA1_6.JPG stimuliCJA1_7.JPG stimuliCJA1_8.JPG
## -0.2509 -0.2295 -0.2509
## stimuliCJA2_10.JPG stimuliCJA2_11.JPG stimuliCJA2_12.JPG
## -0.5472 -0.3945 -0.5838
## stimuliCJA2_13.JPG stimuliCJA2_14.JPG stimuliCJA2_15.JPG
## -0.4722 -0.6019 -0.3137
## stimuliCJA2_16.JPG stimuliCJA2_1.JPG stimuliCJA2_2.JPG
## -0.6019 -0.2509 -0.3746
## stimuliCJA2_3.JPG stimuliCJA2_4.JPG stimuliCJA2_5.JPG
## -0.5100 -0.7593 -0.6730
## stimuliCJA2_6.JPG stimuliCJA2_7.JPG stimuliCJA2_8.JPG
## -0.8264 -0.3342 -0.1859
## stimuliCJA2_9.JPG stimuliCJA3_10.JPG stimuliCJA3_11.JPG
## -0.6019 -1.4126 -1.3684
## stimuliCJA3_12.JPG stimuliCJA3_13.JPG stimuliCJA3_14.JPG
## -2.0444 -1.1589 -1.8508
## stimuliCJA3_15.JPG stimuliCJA3_16.JPG stimuliCJA3_1.JPG
## -2.1052 -1.9098 -1.9843
## stimuliCJA3_2.JPG stimuliCJA3_3.JPG stimuliCJA3_4.JPG
## -1.6461 -1.5150 -1.5734
## stimuliCJA3_5.JPG stimuliCJA3_6.JPG stimuliCJA3_7.JPG
## -1.5879 -1.7044 -1.7482
## stimuliCJA3_8.JPG stimuliCJA3_9.JPG
## -1.5588 -2.0143
## convergence code 0; 1 optimizer warnings; 0 lme4 warnings
se <- sqrt(diag(vcov(test.model)))
# table of estimates with 95% CI
tab <- as.data.frame(cbind(Est = fixef(test.model),
LL = fixef(test.model) - 1.96 * se,
UL = fixef(test.model) + 1.96 * se))
# sort by descending Estimation of regression coefficient
kable(tab[order(tab$Est),])
| Est | LL | UL | |
|---|---|---|---|
| stimuliCJA3_15.JPG | -2.1051734 | -2.4949498 | -1.7153969 |
| stimuliCJA3_12.JPG | -2.0444008 | -2.4333259 | -1.6554757 |
| stimuliCJA3_9.JPG | -2.0142608 | -2.4028019 | -1.6257197 |
| stimuliCJA3_1.JPG | -1.9842673 | -2.3723999 | -1.5961347 |
| stimuliCJA3_16.JPG | -1.9098469 | -2.2971872 | -1.5225066 |
| stimuliCJA3_14.JPG | -1.8507923 | -2.2376389 | -1.4639457 |
| stimuliCJA3_7.JPG | -1.7481944 | -2.1344941 | -1.3618947 |
| stimuliCJA3_6.JPG | -1.7044188 | -2.0905482 | -1.3182894 |
| stimuliCJA3_2.JPG | -1.6461498 | -2.0321793 | -1.2601203 |
| stimuliCJA3_5.JPG | -1.5879145 | -1.9738404 | -1.2019886 |
| stimuliCJA3_4.JPG | -1.5733526 | -1.9593195 | -1.1873858 |
| stimuliCJA3_8.JPG | -1.5587847 | -1.9447289 | -1.1728406 |
| stimuliCJA3_3.JPG | -1.5150451 | -1.9011239 | -1.1289662 |
| stimuliCJA3_10.JPG | -1.4125895 | -1.7991097 | -1.0260693 |
| stimuliCJA3_11.JPG | -1.3684232 | -1.7551534 | -0.9816931 |
| stimuliCJA3_13.JPG | -1.1589209 | -1.5480339 | -0.7698080 |
| stimuliCJA2_6.JPG | -0.8264096 | -1.2221883 | -0.4306309 |
| stimuliCJA2_4.JPG | -0.7592594 | -1.1568841 | -0.3616347 |
| stimuliCJA2_5.JPG | -0.6730131 | -1.0731437 | -0.2728826 |
| stimuliCJA2_16.JPG | -0.6019088 | -1.0044233 | -0.1993943 |
| stimuliCJA2_9.JPG | -0.6019086 | -1.0044187 | -0.1993986 |
| stimuliCJA2_14.JPG | -0.6019077 | -1.0044933 | -0.1993222 |
| stimuliCJA2_12.JPG | -0.5838082 | -0.9869391 | -0.1806773 |
| stimuliCJA2_10.JPG | -0.5471901 | -0.9517227 | -0.1426574 |
| stimuliCJA2_3.JPG | -0.5099837 | -0.9158950 | -0.1040724 |
| stimuliCJA2_13.JPG | -0.4721549 | -0.8796712 | -0.0646385 |
| stimuliCJA1_4.JPG | -0.4141474 | -0.8240567 | -0.0042381 |
| stimuliCJA2_11.JPG | -0.3944532 | -0.8052696 | 0.0163633 |
| stimuliCJA1_3.JPG | -0.3745687 | -0.7862796 | 0.0371423 |
| stimuliCJA2_2.JPG | -0.3745674 | -0.7862275 | 0.0370927 |
| stimuliCJA2_7.JPG | -0.3342051 | -0.7477663 | 0.0793560 |
| stimuliCJA1_2.JPG | -0.3137142 | -0.7283204 | 0.1008920 |
| stimuliCJA1_5.JPG | -0.3137126 | -0.7282934 | 0.1008681 |
| stimuliCJA2_15.JPG | -0.3137126 | -0.7282854 | 0.1008601 |
| stimuliCJA2_1.JPG | -0.2509015 | -0.6687080 | 0.1669049 |
| stimuliCJA1_6.JPG | -0.2509013 | -0.6686418 | 0.1668392 |
| stimuliCJA1_8.JPG | -0.2509007 | -0.6685904 | 0.1667891 |
| stimuliCJA1_7.JPG | -0.2294964 | -0.6483248 | 0.1893320 |
| stimuliCJA2_8.JPG | -0.1859219 | -0.6070896 | 0.2352457 |
| IsEgD1 | -0.1012429 | -0.3491373 | 0.1466515 |
| (Intercept) | 1.6875543 | 1.3360443 | 2.0390643 |
Things to consider, we did not consider learning that happened across the 4 blocks for each subject, in this analysis we consider the 4 repetitions of the same stimuli as equal. I also need to do a back transformation to transform linear regression coefficent to proportion terms, then the interpretation could be simpler such that the estimate would be the change in the probability of getting to a correct answer.