load packages, to install go to “tools” on the search bar and select “install packages”. make sure “install dependencies” is checked, or use command: install.packages(“name”, dependencies = TRUE)
library(readr)
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.4.2
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(lme4)#make sure lme4 is called before lmerTest
## Warning: package 'lme4' was built under R version 3.4.4
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 3.4.3
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
library(lmerTest)
## Warning: package 'lmerTest' was built under R version 3.4.4
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
read in data. can run line of code below, or go to the right window pane, select “import dataset” and go from there
study1_raw_data <- read_csv("C:/Users/dasil/Dropbox/schema_incon/study1_raw_data.csv") #change directory
## Warning: Duplicated column names deduplicated: 'COL_20trai' =>
## 'COL_20trai_1' [511], 'IMG_07trai' => 'IMG_07trai_1' [562], 'CNL_20trai' =>
## 'CNL_20trai_1' [691]
## Parsed with column specification:
## cols(
## .default = col_character(),
## ID = col_integer()
## )
## See spec(...) for full column specifications.
start munging, reshape2 makes this incredibly easy
library(reshape2)
dat <- study1_raw_data[-1, ] # drop first column, leave original data untouched
dim(dat) # 69 rows, 724 columns
## [1] 69 724
dat_melt <- melt(dat, id.vars = c("ID", "GENDER", "AGE", "ETHNICITY")) # change data from wide to long
dim(dat_melt) # now 49680 rows and 6 columns
## [1] 49680 6
melt_reord <- dat_melt[order(dat_melt$ID),] #reorder
#View(melt_reord) #use view to take a look at any df or table to inspect things
melt_reord$variable <- as.character(melt_reord$variable) #change from factor to char
agg <- aggregate(variable ~ ID, data = melt_reord, FUN = length) #make sure our reshape went as expected
unique(agg$variable) # everyone has 720 rows of data now which is what we'd expect
## [1] 720
more munging, need to add factor info to the dataframe (congruency, sociality, novel), create a little index table
#make a little reference table, same as making a pivot table in excel
prefix <- c("COF", "IOF", "COM", "IOM", "CNF", "INF", "CNM", "INM", "IOL", "COL", "INL", "CNL")
congruency <- c("con" ,"incon", "con" ,"incon","con" ,"incon", "con" ,"incon", "incon", "con", "incon", "con" )
sociality <- c(rep("soc",8), rep("nonsoc",4))
novel <- c(rep("old", 4), rep("new", 4), rep("old", 2), rep("new", 2))
factor_table <- data.frame(prefix, congruency, sociality, novel)
merge the table with the data
melt_reord_colsplit <- melt_reord %>% tidyr::separate(variable, into = c('prefix', 'suffix'), sep = 3, remove = FALSE)
dat_merged <- merge(melt_reord_colsplit, factor_table, by = "prefix", all = TRUE)
all.equal(nrow(melt_reord_colsplit), nrow(dat_merged)) #make sure no rows got dropped for some reason
## [1] TRUE
dat_merge_reord <- dat_merged[order(dat_merged$ID, dat_merged$prefix, dat_merged$suffix),] #reorder merged data
dat2compare <- melt_reord_colsplit[order(melt_reord_colsplit$ID, melt_reord_colsplit$prefix, melt_reord_colsplit$suffix),] #temp data - just ensuring merge went as expected
compare_vals <- list()
for (i in 1:7){
compare_vals[[i]] <- all.equal(dat_merge_reord[, 2:8][, i], dat2compare[,c(1:5,7,8)][, i]) #make sure pre and post merge values are the same and nothing weird happened
}
unlist(compare_vals) # all true
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
get an associative score by splitting by participant and condition and summing - we will need to chat about this to clarify
dat_merge_reord$response <- ifelse(dat_merge_reord$value == 1 | dat_merge_reord$value == 2, 1, 0) # i believe either one or two were correct?
ADDED - think this is what you are referring to?
temp <- dat_merge_reord
temp$img_bin <- grepl("img", temp$variable )
temp$face_bin <- grepl("face", temp$variable )
temp_sub <- temp[temp$img_bin == TRUE | temp$face_bin == TRUE, ]
m0 <- glmer(response ~ sociality*congruency + (1|ID) , family = "binomial" ,glmerControl(optimizer = "bobyqa") , data = temp_sub)
summary(m0)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: response ~ sociality * congruency + (1 | ID)
## Data: temp_sub
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 19168.2 19206.8 -9579.1 19158.2 16555
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3297 -0.6647 -0.5202 1.0687 5.0029
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.4557 0.6751
## Number of obs: 16560, groups: ID, 69
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.08449 0.08918 -12.161 < 2e-16 ***
## socialitysoc 0.25628 0.05004 5.122 3.03e-07 ***
## congruencyincon 0.07619 0.05077 1.501 0.133
## socialitysoc:congruencyincon -0.07260 0.07047 -1.030 0.303
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scltys cngrnc
## socialitysc -0.294
## congrncyncn -0.289 0.515
## scltysc:cng 0.208 -0.710 -0.720
col_split <- dat_merge_reord %>% tidyr::separate(variable, into = c('prefix2', 'suffix2'), sep = 6, remove = FALSE) #isolate trial
col_split[, c(1, 6:9, 11:13)] <- apply(col_split[, c(1, 6:9, 11:13)], 2, as.character) #change columns to character
dat_split <- split(col_split, list(col_split$ID,col_split$prefix2)) # split by trial into list of list
dat_out <- list()
for (i in 1:length(dat_split)){
dat_out[[i]] <- unlist(c(sum(dat_split[[i]]$response, na.rm = TRUE), dat_split[[i]][1,c(1:14)])) #sum across face, job, trial - 3 is what we are looking for?
}
dat_out_comb <- do.call("rbind", dat_out)
dat_out_comb <- as.data.frame(dat_out_comb)
dat_out_comb_ord <- dat_out_comb[order(dat_out_comb$ID),]
dat_out_comb_ord$new_resp <- ifelse(dat_out_comb_ord$V1 == 3, 1, 0) #3 indicates they got everything correct, 0 if they didnt
model fitting. using glmer as we are working w/ binomial data. changing optimizer to bobyqa to aid in covergence. need to chat about how/if we can incorporate item intercepts
#temp <- xtabs( ~ congruency + sociality + ID, data = dat_out_comb_ord)
m1 <- glmer(new_resp ~ sociality*congruency + (1|ID) , family = "binomial" ,glmerControl(optimizer = "bobyqa") , data = dat_out_comb_ord)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: new_resp ~ sociality * congruency + (1 | ID)
## Data: dat_out_comb_ord
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 12823.9 12862.5 -6407.0 12813.9 16555
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.7975 -0.4550 -0.3385 -0.2191 7.6935
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 0.6647 0.8153
## Number of obs: 16560, groups: ID, 69
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.17963 0.11057 -19.713 < 2e-16 ***
## socialitysoc 0.36064 0.06467 5.577 2.45e-08 ***
## congruencyincon -0.12764 0.06981 -1.828 0.0675 .
## socialitysoc:congruencyincon 0.22746 0.09205 2.471 0.0135 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scltys cngrnc
## socialitysc -0.328
## congrncyncn -0.301 0.515
## scltysc:cng 0.227 -0.702 -0.758
dat_out_comb_ord$AGE <- as.numeric(as.character(dat_out_comb_ord$AGE))
#adding in random slopes for our within subject vars
m2 <- glmer(new_resp ~ sociality*congruency + (1+sociality+congruency|ID) , family = "binomial" ,glmerControl(optimizer = "bobyqa") , data = dat_out_comb_ord)
summary(m2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## new_resp ~ sociality * congruency + (1 + sociality + congruency | ID)
## Data: dat_out_comb_ord
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 12789.2 12866.4 -6384.6 12769.2 16550
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.8181 -0.4738 -0.3241 -0.2110 8.2766
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 0.89434 0.9457
## socialitysoc 0.25691 0.5069 -0.49
## congruencyincon 0.01997 0.1413 -0.28 0.13
## Number of obs: 16560, groups: ID, 69
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.25215 0.12825 -17.560 < 2e-16 ***
## socialitysoc 0.42898 0.09587 4.475 7.65e-06 ***
## congruencyincon -0.11300 0.07948 -1.422 0.1551
## socialitysoc:congruencyincon 0.22273 0.09435 2.361 0.0182 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scltys cngrnc
## socialitysc -0.544
## congrncyncn -0.338 0.347
## scltysc:cng 0.210 -0.488 -0.707
anova(m1, m2)
## Data: dat_out_comb_ord
## Models:
## m1: new_resp ~ sociality * congruency + (1 | ID)
## m2: new_resp ~ sociality * congruency + (1 + sociality + congruency |
## m2: ID)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1 5 12824 12862 -6407.0 12814
## m2 10 12789 12866 -6384.6 12769 44.699 5 1.67e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1