mem_meghan

load packages, to install go to “tools” on the search bar and select “install packages”. make sure “install dependencies” is checked, or use command: install.packages(“name”, dependencies = TRUE)

library(readr)

library(tidyr)

## Warning: package 'tidyr' was built under R version 3.4.2

library(reshape2)

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

library(lme4)#make sure lme4 is called before lmerTest

## Warning: package 'lme4' was built under R version 3.4.4

## Loading required package: Matrix

## Warning: package 'Matrix' was built under R version 3.4.3

## 
## Attaching package: 'Matrix'

## The following object is masked from 'package:tidyr':
## 
##     expand

library(lmerTest)

## Warning: package 'lmerTest' was built under R version 3.4.4

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

read in data. can run line of code below, or go to the right window pane, select “import dataset” and go from there

study1_raw_data <- read_csv("C:/Users/dasil/Dropbox/schema_incon/study1_raw_data.csv") #change directory

## Warning: Duplicated column names deduplicated: 'COL_20trai' =>
## 'COL_20trai_1' [511], 'IMG_07trai' => 'IMG_07trai_1' [562], 'CNL_20trai' =>
## 'CNL_20trai_1' [691]

## Parsed with column specification:
## cols(
##   .default = col_character(),
##   ID = col_integer()
## )

## See spec(...) for full column specifications.

start munging, reshape2 makes this incredibly easy

library(reshape2)

dat <- study1_raw_data[-1, ] # drop first column, leave original data untouched

dim(dat) # 69 rows, 724 columns

## [1]  69 724

dat_melt <- melt(dat, id.vars = c("ID", "GENDER", "AGE", "ETHNICITY")) # change data from wide to long

dim(dat_melt) # now 49680 rows and 6 columns

## [1] 49680     6

melt_reord <- dat_melt[order(dat_melt$ID),] #reorder

#View(melt_reord) #use view to take a look at any df or table to inspect things 

melt_reord$variable <- as.character(melt_reord$variable) #change from factor to char

agg <- aggregate(variable ~ ID, data = melt_reord, FUN = length) #make sure our reshape went as expected

unique(agg$variable) # everyone has 720 rows of data now which is what we'd expect

## [1] 720

more munging, need to add factor info to the dataframe (congruency, sociality, novel), create a little index table

#make a little reference table, same as making a pivot table in excel

prefix <- c("COF", "IOF", "COM", "IOM", "CNF", "INF", "CNM", "INM", "IOL", "COL", "INL", "CNL")

congruency <- c("con" ,"incon", "con" ,"incon","con" ,"incon", "con" ,"incon", "incon", "con", "incon", "con" )

sociality <- c(rep("soc",8), rep("nonsoc",4))

novel <- c(rep("old", 4), rep("new", 4), rep("old", 2), rep("new", 2))

factor_table <- data.frame(prefix, congruency, sociality, novel)

merge the table with the data

melt_reord_colsplit <- melt_reord %>% tidyr::separate(variable, into = c('prefix', 'suffix'), sep = 3, remove = FALSE)

dat_merged <- merge(melt_reord_colsplit, factor_table, by = "prefix", all = TRUE)

all.equal(nrow(melt_reord_colsplit), nrow(dat_merged)) #make sure no rows got dropped for some reason

## [1] TRUE

dat_merge_reord <- dat_merged[order(dat_merged$ID, dat_merged$prefix, dat_merged$suffix),] #reorder merged data

dat2compare <- melt_reord_colsplit[order(melt_reord_colsplit$ID, melt_reord_colsplit$prefix, melt_reord_colsplit$suffix),] #temp data - just ensuring merge went as expected

compare_vals <- list()

for (i in 1:7){

  compare_vals[[i]] <- all.equal(dat_merge_reord[, 2:8][, i], dat2compare[,c(1:5,7,8)][, i]) #make sure pre and post merge values are the same and nothing weird happened

}

unlist(compare_vals) # all true

## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE

get an associative score by splitting by participant and condition and summing - we will need to chat about this to clarify

dat_merge_reord$response <- ifelse(dat_merge_reord$value == 1 | dat_merge_reord$value == 2, 1, 0) # i believe either one or two were correct?

ADDED - think this is what you are referring to?

temp <- dat_merge_reord

temp$img_bin <- grepl("img", temp$variable )

temp$face_bin <- grepl("face", temp$variable )

temp_sub <- temp[temp$img_bin == TRUE | temp$face_bin == TRUE, ]


m0 <- glmer(response ~ sociality*congruency + (1|ID) , family = "binomial" ,glmerControl(optimizer = "bobyqa") , data = temp_sub)

summary(m0)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: response ~ sociality * congruency + (1 | ID)
##    Data: temp_sub
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  19168.2  19206.8  -9579.1  19158.2    16555 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3297 -0.6647 -0.5202  1.0687  5.0029 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 0.4557   0.6751  
## Number of obs: 16560, groups:  ID, 69
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -1.08449    0.08918 -12.161  < 2e-16 ***
## socialitysoc                  0.25628    0.05004   5.122 3.03e-07 ***
## congruencyincon               0.07619    0.05077   1.501    0.133    
## socialitysoc:congruencyincon -0.07260    0.07047  -1.030    0.303    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) scltys cngrnc
## socialitysc -0.294              
## congrncyncn -0.289  0.515       
## scltysc:cng  0.208 -0.710 -0.720

col_split <- dat_merge_reord %>% tidyr::separate(variable, into = c('prefix2', 'suffix2'), sep = 6, remove = FALSE) #isolate trial

col_split[, c(1, 6:9, 11:13)] <- apply(col_split[, c(1, 6:9, 11:13)], 2, as.character) #change columns to character

dat_split <- split(col_split, list(col_split$ID,col_split$prefix2)) # split by trial into list of list

dat_out <- list()

for (i in 1:length(dat_split)){

  dat_out[[i]] <- unlist(c(sum(dat_split[[i]]$response, na.rm = TRUE), dat_split[[i]][1,c(1:14)])) #sum across face, job, trial - 3 is what we are looking for?

}

dat_out_comb <- do.call("rbind", dat_out)

dat_out_comb <- as.data.frame(dat_out_comb)

dat_out_comb_ord <- dat_out_comb[order(dat_out_comb$ID),]

dat_out_comb_ord$new_resp <- ifelse(dat_out_comb_ord$V1 == 3, 1, 0) #3 indicates they got everything correct, 0 if they didnt

model fitting. using glmer as we are working w/ binomial data. changing optimizer to bobyqa to aid in covergence. need to chat about how/if we can incorporate item intercepts

#temp <- xtabs( ~ congruency + sociality + ID, data = dat_out_comb_ord)

m1 <- glmer(new_resp ~ sociality*congruency + (1|ID) , family = "binomial" ,glmerControl(optimizer = "bobyqa") , data = dat_out_comb_ord)

summary(m1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: new_resp ~ sociality * congruency + (1 | ID)
##    Data: dat_out_comb_ord
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  12823.9  12862.5  -6407.0  12813.9    16555 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.7975 -0.4550 -0.3385 -0.2191  7.6935 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 0.6647   0.8153  
## Number of obs: 16560, groups:  ID, 69
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -2.17963    0.11057 -19.713  < 2e-16 ***
## socialitysoc                  0.36064    0.06467   5.577 2.45e-08 ***
## congruencyincon              -0.12764    0.06981  -1.828   0.0675 .  
## socialitysoc:congruencyincon  0.22746    0.09205   2.471   0.0135 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) scltys cngrnc
## socialitysc -0.328              
## congrncyncn -0.301  0.515       
## scltysc:cng  0.227 -0.702 -0.758

dat_out_comb_ord$AGE <- as.numeric(as.character(dat_out_comb_ord$AGE))


#adding in random slopes for our within subject vars
m2 <- glmer(new_resp ~ sociality*congruency + (1+sociality+congruency|ID) , family = "binomial" ,glmerControl(optimizer = "bobyqa") , data = dat_out_comb_ord)

summary(m2)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## new_resp ~ sociality * congruency + (1 + sociality + congruency |      ID)
##    Data: dat_out_comb_ord
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##  12789.2  12866.4  -6384.6  12769.2    16550 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.8181 -0.4738 -0.3241 -0.2110  8.2766 
## 
## Random effects:
##  Groups Name            Variance Std.Dev. Corr       
##  ID     (Intercept)     0.89434  0.9457              
##         socialitysoc    0.25691  0.5069   -0.49      
##         congruencyincon 0.01997  0.1413   -0.28  0.13
## Number of obs: 16560, groups:  ID, 69
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -2.25215    0.12825 -17.560  < 2e-16 ***
## socialitysoc                  0.42898    0.09587   4.475 7.65e-06 ***
## congruencyincon              -0.11300    0.07948  -1.422   0.1551    
## socialitysoc:congruencyincon  0.22273    0.09435   2.361   0.0182 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) scltys cngrnc
## socialitysc -0.544              
## congrncyncn -0.338  0.347       
## scltysc:cng  0.210 -0.488 -0.707

anova(m1, m2)

## Data: dat_out_comb_ord
## Models:
## m1: new_resp ~ sociality * congruency + (1 | ID)
## m2: new_resp ~ sociality * congruency + (1 + sociality + congruency | 
## m2:     ID)
##    Df   AIC   BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## m1  5 12824 12862 -6407.0    12814                             
## m2 10 12789 12866 -6384.6    12769 44.699      5   1.67e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1