library(tidyverse)
library(zoo)
library(knitr)
library(tibble)
library(plyr)
library(dplyr)
library(data.table)
library(Hmisc)
library(grt)
library(reshape) #reshape data from long to wide
library(MASS) #backward elimination of non-significant predictors in linear models
library(ggcorrplot)
library(car) #Anova
library(emmeans) #multiple comparisons
library(Rmisc)
library(ltm) #cronbach alpha
library(kableExtra)
#need this to work on Mac
logLik.grg <- function(object, ...)
{
  val <- object$logLik
  class(val) <- "logLik"
  val
}

Temperament Data

df <- read.csv("exp_qualtrics_II_2.csv", header=T)
df<-df[, 1:74]
df[13:74]<-df[13:74] %>% mutate_all(~ifelse(is.na(.x), mean(.x, na.rm = TRUE), .x)) #fill na with col average

colnames(df)[which(names(df) == "PA.3_1")] <- "EX.3_1"
colnames(df)[which(names(df) == "EX.40_1")] <- "EC.40_1"

cols.num <- c(13:74) #find ATQ columns
df[cols.num] <- sapply(df[cols.num],as.numeric) #convert to numerics
#sapply(df, class)
indx <- grepl('R', colnames(df)) #find reverse coded items
df[,indx]<-abs(df[,indx]-8) #reverse coding
NA_indx <- grepl('NA', colnames(df)) #find Neg_Affect items
NA_sum <- rowSums(df[, NA_indx]) #total score of NA items for each participant
df$NA_score <- NA_sum/25 #append average NA scores to df
cronbach.alpha(df[,NA_indx])
## 
## Cronbach's alpha for the 'df[, NA_indx]' data-set
## 
## Items: 26
## Sample units: 245
## alpha: 0.774
EC_indx <- grepl('EC', colnames(df)) #find EC items
EC_sum <- rowSums(df[, EC_indx]) #total score of EC items for each participant
df$EC_score <- EC_sum/19 #append EC scores to df
cronbach.alpha(df[,EC_indx])
## 
## Cronbach's alpha for the 'df[, EC_indx]' data-set
## 
## Items: 17
## Sample units: 245
## alpha: 0.773
EX_indx <- grepl('EX', colnames(df)) #find EX items
EX_sum <- rowSums(df[, EX_indx]) #total score of EX items for each participant
df$EX_score <- EX_sum/17 #append EX scores to df
cronbach.alpha(df[,EX_indx])
## 
## Cronbach's alpha for the 'df[, EX_indx]' data-set
## 
## Items: 19
## Sample units: 245
## alpha: 0.733
df_temp <-df[,c(1, 3:12, 75:77)]
#generate columns that take temperament factors as low/medium/high levels for potential analyses
df_temp$EC_level <- as.factor(ifelse(df_temp$EC_score<=median(df_temp$EC_score),"low","high"))

df_temp$NA_level <- as.factor(ifelse(df_temp$NA_score<=median(df_temp$NA_score),"low","high"))

df_temp$EX_level <- as.factor(ifelse(df_temp$EX_score<=median(df_temp$EX_score),"low","high"))

Pavlovia Data

setwd( "/home/tzhu/Desktop/decisionBound_data/exp_II_analysis/completed_exp_II_data")

csv <- list.files(full.names = TRUE, pattern = '*.csv')

df_Pav <- as_tibble(rbind.fill(lapply (csv, fread)))
#subset dataset

# which(colnames(df_Pav)=="exp_Sentence")
# which(colnames(df_Pav)=="catlearn_corrAns")
cols1 <- c(1, 85:131)
df_Pav<-df_Pav[,cols1]
# which(colnames(df_Pav)=="exp_typedWord")
# which(colnames(df_Pav)=="numberWords_recalled")
# which(colnames(df_Pav)=="key_resp_20.keys")
# which(colnames(df_Pav)=="II_stim_file")
cols2 <- c(1:6,12,20:21,31:33,40:48)
df_Pav<-df_Pav[,cols2] 
#fill empty cells with NA
df_Pav <- df_Pav %>%
mutate_at(vars(colnames(.)),
        .funs = funs(ifelse(.=="", NA, as.character(.))))

#remove empty rows
df_Pav<-df_Pav[!is.na(df_Pav$exp_Sentence) | !is.na(df_Pav$exp_typedWord) | !is.na(df_Pav$key_resp_20.keys),]

RSpan

# which(colnames(df_Pav)=="sentence_num_total")
df_rspan<-df_Pav[, 1:9] #subset relevant cols
df_rspan<-df_rspan[!is.na(df_rspan$exp_Sentence) | !is.na(df_rspan$exp_typedWord),] #keep rows with either judgment or recall responses
cols4 <- c(4, 6, 8:9)
df_rspan[cols4] <- sapply(df_rspan[cols4],as.numeric)
#keep only judgment related rows
Proportion_Correct <- df_rspan %>% 
  filter(!is.na(exp_isCorrect))

#late responses on judgment are filled with rt of 4.5 
Proportion_Correct$key_resp_10.rt[is.na(Proportion_Correct$key_resp_10.rt)] <- 4.5
#keep only recalled_word related rows
recall_num <- df_rspan %>% 
  filter(!is.na(numberWords_recalled))
recall_num$number_answered<-sapply(strsplit(recall_num$exp_typedWord, " "), length)
recall_num$intrusion_error<-recall_num$number_answered - recall_num$numberWords_recalled #intrusion are the incorrect words typed

RSpan Performance data

#Proportion_Correct dataframe
df1<-aggregate(Proportion_Correct[, c(4,6)], list(Proportion_Correct$UniqueID), mean)#average for each Ss performance
#recalled number dataframe
df2<-aggregate(recall_num[, c(8,11)], list(recall_num$UniqueID), sum)
df_rspan_perf <- merge(df1,df2,by="Group.1")
names(df_rspan_perf)[1] <- "UniqueID"
df_rspan_perf<-df_rspan_perf[!df_rspan_perf$UniqueID=="9399sh",]
head(df_rspan_perf)

Merge temperament df with Rspan

df_tem_rspan<-merge(df_temp, df_rspan_perf, by = "UniqueID")
head(df_tem_rspan)

Category learning data

cols3<- c(1, 10:21)
df_catlearn<-df_Pav[, cols3] #remove irrelevant cols
df_catlearn<-df_catlearn[!is.na(df_catlearn$key_resp_20.keys),] #remove empty rows

df_catlearn$block <- rep(1:6, times=222, each = 60)
df_catlearn<- df_catlearn[, c(1, 14, 2:13)]

#describe(df_catlearn)

df_catlearn$key_resp_20.corr<-as.numeric(df_catlearn$key_resp_20.corr)
#describe(df_catlearn)
perf_table<- df_catlearn %>%
    group_by(UniqueID) %>%
    dplyr::summarize(Mean = mean(key_resp_20.corr, na.rm=TRUE))

mean(perf_table$Mean)
## [1] 0.7796851
sd(perf_table$Mean)
## [1] 0.1173764

Category Learning Strategy Fitting

df_catlearn <-(df_catlearn %>% mutate(catlearn_corrAns = case_when(
  catlearn_corrAns == "a" ~ 1,
  catlearn_corrAns == "b" ~ 2))
)

#relabel the key_resp_3.keys as 1 and 2
df_catlearn<-(df_catlearn %>% mutate(key_resp_20.keys = case_when(
  key_resp_20.keys == "a" ~ 1,
  key_resp_20.keys == "b" ~ 2))
)

cols5<-c(4, 5, 7:13)
df_catlearn[cols5] <- sapply(df_catlearn[cols5],as.numeric)
dat<-df_catlearn

#template code for combining blocks
# cols6<-c(1, 3:14)
# dat<-df_catlearn[, cols6]
# dat$block <-rep(1:6, times = 24, each=60)
# source("plot.gcjc.R")

blocks <- levels(factor(dat$block))
sbj <- levels(factor(dat$UniqueID))
models <- c("Orientation", "Length", "Information Integration", "Conjunctive Rule", "Fixed Random", "General Random")
res <- matrix(nrow=length(sbj) * length(blocks),ncol=(length(models) * 2 + 2))
res <- cbind(expand.grid(sbj,blocks), res)
colnames(res)[1:2] <- c("UniqueID","block")
pdf(file="222s_6BLOCK2_ii_individual_plots.pdf", width=12, height=9)
op <- par(mfrow = c(3, 4), pty = "m")

for(i in sbj)
{
  for(j in blocks)
  {
    data <- subset(dat, dat$block == j & dat$UniqueID == i)
    fit.ori <- glc(key_resp_20.keys ~ ori_transform, category=data$catlearn_corrAns, data=data)
    fit.length <- glc(key_resp_20.keys ~ length, category=data$catlearn_corrAns, data=data)
    fit.2d <- glc(key_resp_20.keys ~ length + ori_transform, category=data$catlearn_corrAns, data=data)
    fit.cj <- gcjc(key_resp_20.keys ~ length + ori_transform,
                   category=data$catlearn_corrAns, data=data,
                   equal.noise = TRUE, config = 2,
                   fixed = list(noise = c(FALSE, FALSE), bias = c(FALSE, FALSE)))
    fit.frg <- grg(data$catlearn_corrAns, fixed=T)
    fit.grg <- grg(data$catlearn_corrAns, fixed=F)
    AICs <- AIC(fit.ori, fit.length, fit.2d, fit.cj, fit.frg, fit.grg)
    BICs <- AIC(fit.ori, fit.length, fit.2d, fit.cj, fit.frg, fit.grg, k=log(60))
    winner <- which.min(AICs$AIC)
    winner2 <- which.min(BICs$AIC)
    res[res$UniqueID==i & res$block == j,3:ncol(res)] <- c(round(AICs$AIC,2), models[winner], round(BICs$AIC,2), models[winner2])
  ##### now plot'em ###RACHEL CHANGED 'winner' to 'winner2' to reflect BIC instead of AIC
    # col <- rep("black",4)
    # col[winner2] <- "red"
    # bg <- c("blue","red")[factor(data$key_resp_20.keys)]
    # pch <- c(21,24)[factor(data$catlearn_corrAns)]
    # title <- paste(i, ": block", j, ": ", models[winner2])
    # plot(ori_transform ~ length, data=data, xlab="Length", ylab="Orientation", bg=bg, pch=pch, xlim=c(0,300), ylim=c(0, 110), main=title)
    # abline(h=coef(fit.ori), col = col[1])
    # abline(v=coef(fit.length), col = col[2])
    # abline(coef(fit.2d), col = col[3])
    # cj.coef <- coef(fit.cj)
    # abline(h=cj.coef[["ori_transform"]], col = col[4], lty = 3)
    # abline(v=cj.coef[["length"]], col = col[4], lty = 3)
  }
}

dev.off()
## png 
##   2
res.nam <- c("ori","length","2d","cj","frg","grg","winner")
colnames(res)[3:16] <- c(paste(res.nam, "AIC", sep="."), paste(res.nam, "BIC", sep="."))

Category learning Performance by block

catlearn_perf<- dat %>%
    group_by(UniqueID, block) %>%
    dplyr::summarize(Mean = mean(key_resp_20.corr, na.rm=TRUE))

head(catlearn_perf)
wide1<-cast(catlearn_perf, UniqueID~block) 

Descriptives of Participants Strategy Use

# count the number of time each model is used by each participant
model_block <- res %>% 
  group_by(UniqueID, winner.BIC) %>%
  dplyr::summarise(Count = n())

#how many blocks for each Ss was best fitted by CR model
head(model_block[with(model_block, winner.BIC == "Information Integration" ), ])
# find the first block each S used the optimal CR strategy
first_ii<- res %>% 
  group_by(UniqueID, block, winner.BIC) %>%
  dplyr::summarise(Count = n())

a<-first_ii[first_ii$winner.BIC == "Information Integration",]
first_ii<-a %>% 
    group_by(UniqueID) %>% 
    slice(which.min(block))

head(first_ii)

Combine Temperament Rspan data with Catlearn data

#merge df_tem_rspan that has temperament scores and rspan performance with catlearn model fit descriptives
b<-merge(first_ii, df_tem_rspan, all = TRUE)
cols<-c(1:2, 5:24)
b<-b[, cols]
colnames(b)[2] <- "first_II_block"
b[,2] <- as.numeric(as.character(b[,2]))
#reshape model_block from long to wide, this df contains number of blocks best fitted by each strategy
wide<-cast(model_block, UniqueID ~ winner.BIC)
b<-merge(wide,b,all =TRUE)

complete<-setnames(b, old = c('Conjunctive Rule','Fixed Random','Information Integration', 'Length','Orientation', 'General Random'), new = c('total_CR','total_fixedRand','total_II','total_length','total_ori', 'total_genRand'))


# generate a column that keep count of the total number of blocks best fitted by rule-based models, regardless of 1d or 2d

#fill NAs with 0 in the block count for each model
complete[, c(2:7)][is.na(complete[, c(2:7)])] <- 0


complete$total_simple_rule<- complete$total_length + complete$total_ori
complete$total_Rand<-complete$total_fixedRand + complete$total_genRand

complete <- complete[,c(1, 11:16,9:10,17:28, 2:8, 29:30)]#rearrange cols

# merge with catlearn accuracy performance of each block
complete<-merge(complete, wide1, by = "UniqueID")

block_strat<-cast(res[,c(1,2,16)], UniqueID~block)
complete<-merge(complete, block_strat, by = "UniqueID")

complete<-complete[!is.na(complete$Age), ]
dt<-complete[,31:36]
#complete$num_low_perf<-rowSums(dt < 0.60)
complete$avg_cat_perf <- rowSums(dt)/6

complete<-setnames(complete, old = c('1.x','2.x','3.x','4.x', '5.x','6.x', '1.y','2.y','3.y','4.y', '5.y','6.y'), new = c('block_1','block_2','block_3','block_4', 'block_5','block_6', 'block_1_strat','block_2_strat','block_3_strat','block_4_strat', 'block_5_strat','block_6_strat'))

complete$Ethnicity[complete$Ethnicity == 'South Asian'] <-'Asian or Pacific Islander'

complete$EC_level <- as.factor(ifelse(complete$EC_score<=median(complete$EC_score),"low","high"))

complete$NA_level <- as.factor(ifelse(complete$NA_score<=median(complete$NA_score),"low","high"))

complete$EX_level <- as.factor(ifelse(complete$EX_score<=median(complete$EX_score),"low","high"))

Pie chart of ethnicity

library(RColorBrewer)
mytable <- table(complete$Ethnicity)
lbls <- paste(names(mytable), " - ", mytable, sep="")
coul <- brewer.pal(9, "Set3") 
pie(mytable, labels = lbls,col=coul, cex = 1.2,
   main="")

Compare temperament scores across largest ethnicity groups

race_data<-complete[complete$Ethnicity=='White or Caucasian'|complete$Ethnicity=='Asian or Pacific Islander',]
table(race_data$Ethnicity)
## 
## Asian or Pacific Islander        White or Caucasian 
##                        81                        91
t.test(race_data$NA_score~race_data$Ethnicity)
## 
##  Welch Two Sample t-test
## 
## data:  race_data$NA_score by race_data$Ethnicity
## t = 0.48583, df = 169.94, p-value = 0.6277
## alternative hypothesis: true difference in means between group Asian or Pacific Islander and group White or Caucasian is not equal to 0
## 95 percent confidence interval:
##  -0.1422139  0.2350670
## sample estimates:
## mean in group Asian or Pacific Islander        mean in group White or Caucasian 
##                                4.510602                                4.464176
t.test(race_data$EX_score~race_data$Ethnicity)
## 
##  Welch Two Sample t-test
## 
## data:  race_data$EX_score by race_data$Ethnicity
## t = -1.8802, df = 158.96, p-value = 0.06192
## alternative hypothesis: true difference in means between group Asian or Pacific Islander and group White or Caucasian is not equal to 0
## 95 percent confidence interval:
##  -0.45175103  0.01111316
## sample estimates:
## mean in group Asian or Pacific Islander        mean in group White or Caucasian 
##                                4.910184                                5.130503
t.test(race_data$EC_score~race_data$Ethnicity)
## 
##  Welch Two Sample t-test
## 
## data:  race_data$EC_score by race_data$Ethnicity
## t = -0.92449, df = 168.64, p-value = 0.3566
## alternative hypothesis: true difference in means between group Asian or Pacific Islander and group White or Caucasian is not equal to 0
## 95 percent confidence interval:
##  -0.3000075  0.1086384
## sample estimates:
## mean in group Asian or Pacific Islander        mean in group White or Caucasian 
##                                3.750157                                3.845842
summary(complete[,c("NA_score","EX_score","EC_score")])
##     NA_score        EX_score        EC_score    
##  Min.   :2.400   Min.   :2.529   Min.   :1.684  
##  1st Qu.:4.040   1st Qu.:4.544   1st Qu.:3.329  
##  Median :4.480   Median :5.047   Median :3.789  
##  Mean   :4.487   Mean   :5.018   Mean   :3.774  
##  3rd Qu.:4.910   3rd Qu.:5.588   3rd Qu.:4.211  
##  Max.   :6.520   Max.   :6.588   Max.   :5.368

Compare temperament scores across genders

gender_data<-complete[complete$Gender=='Female'|complete$Gender=='Male',]
table(gender_data$Gender)
## 
## Female   Male 
##    134     64
t.test(gender_data$NA_score~gender_data$Gender)
## 
##  Welch Two Sample t-test
## 
## data:  gender_data$NA_score by gender_data$Gender
## t = 4.961, df = 123.02, p-value = 2.277e-06
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
##  0.2753013 0.6408409
## sample estimates:
## mean in group Female   mean in group Male 
##             4.634760             4.176689
t.test(gender_data$EX_score~gender_data$Gender)
## 
##  Welch Two Sample t-test
## 
## data:  gender_data$EX_score by gender_data$Gender
## t = -0.64495, df = 127.68, p-value = 0.5201
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
##  -0.2957846  0.1503658
## sample estimates:
## mean in group Female   mean in group Male 
##             4.994386             5.067096
t.test(gender_data$EC_score~gender_data$Gender)
## 
##  Welch Two Sample t-test
## 
## data:  gender_data$EC_score by gender_data$Gender
## t = -0.97582, df = 133.6, p-value = 0.3309
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
##  -0.29417471  0.09980038
## sample estimates:
## mean in group Female   mean in group Male 
##             3.742648             3.839836

Plot different temperament profile’s category learning performance across 6 blocks

tes<-res[c(res$block == '1'| res$block=='6'),c(1,2,16)]
tes<-reshape(tes, idvar = "UniqueID", timevar = "block", direction = "wide")
test<-merge(tes,complete, by='UniqueID')

new_complete<-test[,c(1:11, 14:20,22,23,30,33:45)]
new_complete$early_strat[new_complete$winner.BIC.1 == 'Conjunctive Rule'] <-'CR'
new_complete$early_strat[new_complete$winner.BIC.1 == 'Information Integration'] <-'II'
new_complete$early_strat[new_complete$winner.BIC.1 == 'Length'] <-'1DR'
new_complete$early_strat[new_complete$winner.BIC.1 == 'Orientation'] <-'1DR'
new_complete$early_strat[new_complete$winner.BIC.1 == 'Fixed Random'] <-'Rand'

new_complete$final_strat[new_complete$winner.BIC.6 == 'Conjunctive Rule'] <-'CR'
new_complete$final_strat[new_complete$winner.BIC.6 == 'Information Integration'] <-'II'
new_complete$final_strat[new_complete$winner.BIC.6 == 'Length'] <-'1DR'
new_complete$final_strat[new_complete$winner.BIC.6 == 'Orientation'] <-'1DR'
new_complete$final_strat[new_complete$winner.BIC.6 == 'Fixed Random'] <-'Rand'

new_complete[, "max.block"] <- apply(new_complete[, 22:27], 1, max)

Remove bad participants

new_complete<-new_complete[!new_complete$max.block < 0.60,]
new_complete<-new_complete[!new_complete$exp_isCorrect<.80,]
new_complete<-new_complete[!new_complete$intrusion_error>50,]
new_complete<-new_complete[!new_complete$numberWords_recalled>60,]
198 - nrow(new_complete) # number removed
## [1] 38
table(new_complete$Temp_profile) #get number participants per temperament profile
## < table of extent 0 >

get descriptives for predictors

x<-new_complete[,c(12:14,19)]
sapply(x, function(x) c( "Stand dev" = sd(x), 
                         "Mean"= mean(x,na.rm=TRUE),
                         "Median" = median(x),
                         "Minimum" = min(x),
                         "Maximun" = max(x),
                         "Upper Quantile" = quantile(x,1),
                         "LowerQuartile" = quantile(x,0)
                    )
)
##                      NA_score  EC_score  EX_score numberWords_recalled
## Stand dev           0.6224513 0.7081388 0.7538696             8.447281
## Mean                4.4987867 3.7891522 5.0255140            46.543750
## Median              4.4800000 3.7894737 5.0465357            47.500000
## Minimum             2.8800000 1.6842105 2.5294118            25.000000
## Maximun             6.5200000 5.3684211 6.5882353            60.000000
## Upper Quantile.100% 6.5200000 5.3684211 6.5882353            60.000000
## LowerQuartile.0%    2.8800000 1.6842105 2.5294118            25.000000
#this block of code create the dataframe for extracting best performing block for each participant and return the strategy being used in that block
long_strat1<-gather(new_complete[,c(1,12:14,19,22:27)], block, block_perf, block_1:block_6, factor_key = TRUE)
long_strat2<-gather(new_complete[,c(1,28:33)], block, block_strat, block_1_strat:block_6_strat, factor_key = TRUE)

long_strat1$block<- as.character(long_strat1$block)
long_strat2$block<- as.character(long_strat2$block)

long_strat2$block[long_strat2$block == "block_1_strat"] <- "block_1"
long_strat2$block[long_strat2$block == "block_2_strat"] <- "block_2"
long_strat2$block[long_strat2$block == "block_3_strat"] <- "block_3"
long_strat2$block[long_strat2$block == "block_4_strat"] <- "block_4"
long_strat2$block[long_strat2$block == "block_5_strat"] <- "block_5"
long_strat2$block[long_strat2$block == "block_6_strat"] <- "block_6"

long_strat<-merge(long_strat1,long_strat2,by=c('UniqueID','block'))

long_strat$block_strat[long_strat$block_strat=='Information Integration']<-'II'
long_strat$block_strat[long_strat$block_strat=='Conjunctive Rule']<-'CR'
long_strat$block_strat[long_strat$block_strat=='Orientation']<-'simple_rule'
long_strat$block_strat[long_strat$block_strat=='Length']<-'simple_rule'
long_strat$block_strat[long_strat$block_strat=='Fixed Random']<-'Random'

require(data.table) ## 1.9.2
group <- as.data.table(long_strat)
df_bestBlock_strat<-group[group[, .I[which.max(block_perf)], by=UniqueID]$V1]
# strategy multinomial logistic regression
require(foreign)
## Loading required package: foreign
require(nnet)
## Loading required package: nnet
require(ggplot2)
require(reshape2)
## Loading required package: reshape2
## 
## Attaching package: 'reshape2'
## The following objects are masked from 'package:reshape':
## 
##     colsplit, melt, recast
## The following objects are masked from 'package:data.table':
## 
##     dcast, melt
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(mlogit)
## Loading required package: dfidx
## 
## Attaching package: 'dfidx'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate
## The following object is masked from 'package:stats':
## 
##     filter
# df_bestBlock_strat <-df_bestBlock_strat[!df_bestBlock_strat$block_strat=='Random',]
# 
# df_bestBlock_strat$block_strat <- relevel(factor(df_bestBlock_strat$block_strat), ref = "simple_rule")
# test <- multinom(block_strat ~ NA_score + EC_score + EX_score + numberWords_recalled, data = df_bestBlock_strat)


ggplot(new_complete, aes(x = NA_score, y = block_1, color = early_strat)) +
  geom_point(size = 4)+ xlab('Negative Affect') + ylab('First Block Performance') + theme_classic()+theme(
        legend.position = "none",
        text = element_text(color = "black"), 
        axis.text = element_text(colour = "black"), 
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size=20),
        axis.title = element_text(size=20)
        )+ theme(legend.position="none")

ggplot(new_complete, aes(x = EC_score, y = block_1, color = early_strat)) +
  geom_point(size = 4)+ xlab('Effortful Control') + ylab(' ') + theme_classic()+ theme_classic()+theme(
        legend.position = "none",
        text = element_text(color = "black"), 
        axis.text = element_text(colour = "black"), 
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size=20),
        axis.title = element_text(size=20)
        )+ theme(legend.position="none")

ggplot(new_complete, aes(x = EX_score, y = block_1, color = early_strat)) +
  geom_point(size = 4)+ xlab('Extraversion') + ylab('First Block Performance') +  theme_classic()+theme(
        legend.position = "none",
        text = element_text(color = "black"), 
        axis.text = element_text(colour = "black"), 
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size=20),
        axis.title = element_text(size=20)
        )+ theme(legend.position="none")

ggplot(new_complete, aes(x = numberWords_recalled, y = block_1, color = early_strat)) +
  geom_point(size = 4)+ xlab('Working Memory') + ylab(' ') +  theme_classic()+theme(
        legend.position = "none",
        text = element_text(color = "black"), 
        axis.text = element_text(colour = "black"), 
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size=12),
        axis.title = element_text(size=20)
        )+ theme(legend.position="none")

table(new_complete$early_strat)
## 
##  1DR   CR   II Rand 
##   74    8   57   21
require(foreign)
require(nnet)
require(ggplot2)
require(reshape2)

a<-new_complete
a$early_strat[a$early_strat=='CR']  <- "1DR" 
a$early_strat <- relevel(factor(a$early_strat), ref = "Rand")
test <- multinom(early_strat ~ NA_score + EC_score + EX_score + numberWords_recalled, data = a)
## # weights:  18 (10 variable)
## initial  value 175.777966 
## iter  10 value 154.151277
## iter  20 value 153.224093
## final  value 153.224068 
## converged
summary(test)
## Call:
## multinom(formula = early_strat ~ NA_score + EC_score + EX_score + 
##     numberWords_recalled, data = a)
## 
## Coefficients:
##     (Intercept)  NA_score   EC_score  EX_score numberWords_recalled
## 1DR   -5.561808 0.4277482 0.13778767 0.5742839           0.03586684
## II    -4.512552 0.3822580 0.08435982 0.2849475           0.04550480
## 
## Std. Errors:
##     (Intercept)  NA_score  EC_score  EX_score numberWords_recalled
## 1DR    4.063660 0.4888338 0.3833742 0.3488403           0.02810203
## II     4.181282 0.5058584 0.3959760 0.3570930           0.02954682
## 
## Residual Deviance: 306.4481 
## AIC: 326.4481
z <- summary(test)$coefficients/summary(test)$standard.errors
p <- (1 - pnorm(abs(z), 0, 1)) * 2
p
##     (Intercept)  NA_score  EC_score   EX_score numberWords_recalled
## 1DR   0.1711026 0.3815532 0.7192901 0.09970901            0.2018467
## II    0.2804865 0.4498518 0.8312936 0.42489107            0.1235381

descriptives of the predictors

setnames(new_complete, old = c('NA_score','EC_score','EX_score','numberWords_recalled'), new = c('negative_affect','effortful_control','extraversion','Rspan_score'))

summary(new_complete[,c("negative_affect","extraversion","effortful_control","Rspan_score")])
##  negative_affect  extraversion   effortful_control  Rspan_score   
##  Min.   :2.880   Min.   :2.529   Min.   :1.684     Min.   :25.00  
##  1st Qu.:4.040   1st Qu.:4.644   1st Qu.:3.316     1st Qu.:42.00  
##  Median :4.480   Median :5.047   Median :3.789     Median :47.50  
##  Mean   :4.499   Mean   :5.026   Mean   :3.789     Mean   :46.54  
##  3rd Qu.:4.890   3rd Qu.:5.603   3rd Qu.:4.316     3rd Qu.:53.00  
##  Max.   :6.520   Max.   :6.588   Max.   :5.368     Max.   :60.00
sd(new_complete$negative_affect)
## [1] 0.6224513
sd(new_complete$extraversion)
## [1] 0.7538696
sd(new_complete$effortful_control)
## [1] 0.7081388
hist(new_complete$avg_cat_perf, col='yellow', main=' ', xlab='Average II Task Performance', xlim=c(0.5,0.95),breaks=10, ylim=c(0,50), labels=T, cex.lab = 1.2)

# main="Distribution of participants' average performance in the II Task",

All participants Analysis

Check correlation matrix among predictors

new_complete.corr<- rcorr(as.matrix(new_complete[,c(12:14,19)]))
new_complete.r<-new_complete.corr$r
new_complete.p<-new_complete.corr$P
new_complete.corr
##                   negative_affect effortful_control extraversion Rspan_score
## negative_affect              1.00             -0.40        -0.40        0.02
## effortful_control           -0.40              1.00         0.23       -0.02
## extraversion                -0.40              0.23         1.00        0.01
## Rspan_score                  0.02             -0.02         0.01        1.00
## 
## n= 160 
## 
## 
## P
##                   negative_affect effortful_control extraversion Rspan_score
## negative_affect                   0.0000            0.0000       0.8204     
## effortful_control 0.0000                            0.0035       0.7885     
## extraversion      0.0000          0.0035                         0.8512     
## Rspan_score       0.8204          0.7885            0.8512
new_complete.p
##                   negative_affect effortful_control extraversion Rspan_score
## negative_affect                NA      2.050147e-07 1.725617e-07   0.8203587
## effortful_control    2.050147e-07                NA 3.519200e-03   0.7884998
## extraversion         1.725617e-07      3.519200e-03           NA   0.8512064
## Rspan_score          8.203587e-01      7.884998e-01 8.512064e-01          NA
df_for_tables<- new_complete

df.corr<- rcorr(as.matrix(df_for_tables[,c(12:14, 19)]))
df.corr$r<-round(df.corr$r,3)
df.corr$p<-round(df.corr$P, 5)
df.corr$r[upper.tri(df.corr$r)] <- "-"
df.corr$p[upper.tri(df.corr$r)] <- "-"
df.corr$r
##                   negative_affect effortful_control extraversion Rspan_score
## negative_affect   "1"             "-"               "-"          "-"        
## effortful_control "-0.397"        "1"               "-"          "-"        
## extraversion      "-0.399"        "0.229"           "1"          "-"        
## Rspan_score       "0.018"         "-0.021"          "0.015"      "1"

correlation table in R, not sig. level

library(kableExtra)
# print tables using kable
kable(as.data.frame(format(new_complete.corr$r, scientific = FALSE)), caption = "II Task Predictors - Correlation: r values") %>%
  kable_styling(bootstrap_options = c("hover", "striped"), full_width = F) %>% 
  kable_classic()
II Task Predictors - Correlation: r values
negative_affect effortful_control extraversion Rspan_score
negative_affect 1.00000000 -0.39680392 -0.39904074 0.01809293
effortful_control -0.39680392 1.00000000 0.22942694 -0.02137291
extraversion -0.39904074 0.22942694 1.00000000 0.01494581
Rspan_score 0.01809293 -0.02137291 0.01494581 1.00000000
d_long<-gather(new_complete, block, block_perf, block_1:block_6, factor_key = TRUE)
boxplot(block_perf ~ block, col=c('GREY'), ylab='block performance', d_long)

Main multiple regression

Full Model

mod1<-lm(avg_cat_perf~ negative_affect+extraversion+effortful_control+negative_affect*effortful_control+extraversion*effortful_control+Rspan_score, new_complete)


# standardized beta
mod11<- lm(scale(avg_cat_perf)~scale(negative_affect)+scale(extraversion)+scale(effortful_control)+scale(negative_affect*effortful_control)+scale(extraversion*effortful_control)+scale(Rspan_score), new_complete)

summary(mod1)
## 
## Call:
## lm(formula = avg_cat_perf ~ negative_affect + extraversion + 
##     effortful_control + negative_affect * effortful_control + 
##     extraversion * effortful_control + Rspan_score, data = new_complete)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30316 -0.04423  0.02815  0.06790  0.14805 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                        1.07479    0.46141   2.329   0.0211 *
## negative_affect                   -0.06416    0.06623  -0.969   0.3342  
## extraversion                      -0.01518    0.05550  -0.274   0.7848  
## effortful_control                 -0.11593    0.11828  -0.980   0.3286  
## Rspan_score                        0.00220    0.00092   2.391   0.0180 *
## negative_affect:effortful_control  0.01561    0.01716   0.910   0.3642  
## extraversion:effortful_control     0.00850    0.01406   0.604   0.5464  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09745 on 153 degrees of freedom
## Multiple R-squared:  0.06351,    Adjusted R-squared:  0.02678 
## F-statistic: 1.729 on 6 and 153 DF,  p-value: 0.1177
summary(mod11)
## 
## Call:
## lm(formula = scale(avg_cat_perf) ~ scale(negative_affect) + scale(extraversion) + 
##     scale(effortful_control) + scale(negative_affect * effortful_control) + 
##     scale(extraversion * effortful_control) + scale(Rspan_score), 
##     data = new_complete)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0691 -0.4477  0.2850  0.6874  1.4988 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                 5.014e-16  7.799e-02   0.000
## scale(negative_affect)                     -4.043e-01  4.174e-01  -0.969
## scale(extraversion)                        -1.159e-01  4.235e-01  -0.274
## scale(effortful_control)                   -8.311e-01  8.479e-01  -0.980
## scale(negative_affect * effortful_control)  5.043e-01  5.542e-01   0.910
## scale(extraversion * effortful_control)     4.371e-01  7.231e-01   0.604
## scale(Rspan_score)                          1.881e-01  7.867e-02   2.391
##                                            Pr(>|t|)  
## (Intercept)                                   1.000  
## scale(negative_affect)                        0.334  
## scale(extraversion)                           0.785  
## scale(effortful_control)                      0.329  
## scale(negative_affect * effortful_control)    0.364  
## scale(extraversion * effortful_control)       0.546  
## scale(Rspan_score)                            0.018 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9865 on 153 degrees of freedom
## Multiple R-squared:  0.06351,    Adjusted R-squared:  0.02678 
## F-statistic: 1.729 on 6 and 153 DF,  p-value: 0.1177
library(broom)
library(DescTools)
## 
## Attaching package: 'DescTools'
## The following object is masked from 'package:car':
## 
##     Recode
## The following objects are masked from 'package:Hmisc':
## 
##     %nin%, Label, Mean, Quantile
## The following object is masked from 'package:data.table':
## 
##     %like%
mod1_coef <- cbind(tidy(mod1)[,1:3], 
                  tidy(mod11)[c(1:4,7,5,6),2:3], 
                  tidy(mod1)[,4:5])
  # remove rownames
  rownames(mod1_coef) <- c(1:nrow(mod1_coef))
  # rename cols
  colnames(mod1_coef) <- c("term", "beta", "beta_SE", "std_beta", "std_beta_SE", "t_val", "p_val")

# finally, create df
mod1_t <- 
      rbind(mod1_coef,
      tibble("term" = c("f_stat", "R2/Adj.R2", "N"), 
             "beta" = c(summary(mod1)$fstatistic[2], summary(mod1)$r.squared, nrow(mod1$model)), 
             "beta_SE" = c(summary(mod1)$fstatistic[3], summary(mod1)$adj.r.squared, NA), 
             "std_beta" = c(summary(mod1)$fstatistic[1], rep(NA, 2)), 
             "std_beta_SE" = c(pf(summary(mod1)$fstatistic[1], summary(mod1)$fstatistic[2], summary(mod1)$fstatistic[3], lower.tail = F), rep(NA, 2)), 
             "t_val" = rep(NA, 3), 
             "p_val" = rep(NA, 3)
             )
      )

# remove NA from kable tables
options(knitr.kable.NA = '') 

# show kable table
kable(mod1_t, caption = "Multiple Regression Model Predicting II overall average performance", align = rep('lcccc'), digits = 3, row.names = T, col.names = c("", "Beta", "Beta's SE", "Std. Beta", "Std. Beta's SE",  "t-value", "p-value")) %>% 
  kable_paper(full_width = F) %>%
  # add_header_above(c(" ", "mturk" = 2, "Mturk" = 2)) %>%
  # pack_rows("Criterion(s)", 1, 6) %>%
  # # column_spec(1, width = "10em") %>% 
  # pack_rows("Predictor(s)", 7, 19) %>%
  footnote(general = "...") %>% 
  column_spec(c(4), border_left = T) %>% 
  kable_classic()
Multiple Regression Model Predicting II overall average performance
Beta Beta’s SE Std. Beta Std. Beta’s SE t-value p-value
1 (Intercept) 1.075 0.461 0.000 0.078 2.329 0.021
2 negative_affect -0.064 0.066 -0.404 0.417 -0.969 0.334
3 extraversion -0.015 0.055 -0.116 0.424 -0.274 0.785
4 effortful_control -0.116 0.118 -0.831 0.848 -0.980 0.329
5 Rspan_score 0.002 0.001 0.188 0.079 2.391 0.018
6 negative_affect:effortful_control 0.016 0.017 0.504 0.554 0.910 0.364
7 extraversion:effortful_control 0.009 0.014 0.437 0.723 0.604 0.546
8 f_stat 6.000 153.000 1.729 0.118
9 R2/Adj.R2 0.064 0.027
10 N 160.000
Note:
…
# write.csv(mod1_t,"db_II_reg.csv", row.names = T) #need to check row order across models

Scatterplot of correlational relationship between each predictor and DV

library(tidyr)
predictor_corr_data<-gather(new_complete, predictor, Score, c(12:14,19), factor_key = TRUE)
reg_colours <- c("#414535", "#08605F", "#3F826D", "#858585", "#DDA448", "#F0803C", "#98410B" ) # green > orange

predictor_names <- list(
  'negative_affect'="Negative Affect",
  'effortful_control'="Effortful Control",
  'extraversion'="Extraversion",
  'Rspan_score'="Working Memory"
)

predictor_labeller <- function(variable,value){
  return(predictor_names[value])
}

predictor_scatter <-
  ggplot(predictor_corr_data[,c(30,34,35)], aes(y = avg_cat_perf, x = Score, colour = factor(predictor))) +
  geom_point(shape = 1) + 
  scale_color_manual(values = reg_colours[c(2,2,2,2)]) +
  facet_wrap(predictor ~ ., labeller=predictor_labeller, scales = "free_x")+
  # facet_wrap(~ predictor, ncol = 2, scales = "free_x") +
  geom_smooth(method = "lm") +
  xlab("Predictor Value")+
  ylab("Overall Average II Task Performance") +
  # geom_line(intercept = 83.731, slopr = .5)+
  # add theme
  # font_size(offset.x = 10) +
  # font_size(base.theme = theme_classic()) +
  theme(text = element_text(size=20),
        axis.text.x = element_text(angle=0, hjust=2)) + 
  theme_classic() +
  theme(
        legend.position = "none",
        text = element_text(color = "black"), 
        axis.text = element_text(colour = "black"), 
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size=12),
        axis.title = element_text(size=12)
        )
## Warning: The labeller API has been updated. Labellers taking `variable` and
## `value` arguments are now deprecated. See labellers documentation.
# show fig
predictor_scatter  
## `geom_smooth()` using formula 'y ~ x'

options(knitr.kable.NA = NA) 

reg_colours <- c("#414535", "#08605F", "#3F826D", "#858585", "#DDA448", "#F0803C", "#98410B" ) # green > orange

# plot lm
  # plot standardized betas to compare cooefficients
mod1_reg_plot <- 
  sjPlot::plot_model(model = mod11, # lm output 
              title = "", # to remove title
                     # type = "std", # type "std" = standardized betas
                     #sort.est = T, # sort based on betas (highest on top)
                     # wrap.title = 55, # char length of each title line
                     #axis.lim = c(-.25,.25)
                     # grid.breaks = .25, # x-axis breaks 
                     #se = T # use se instead of CI
                     colors = reg_colours,
                     show.values = T, # show betas
                     show.p = T, # show * for sig p
                     p.threshold = c(.049, .009, .0009), # state up to 3 cut-offs for pvals, the number is inclusive**
                     value.offset = 0.4, # offset to beta labels
                     value.size = 3, # size of beta labels
                     digits = 3, # num decimals in beta labels
                     dot.size = 2.5, line.size = 1, 
                     vline.color = "#F98B99",
                     # group.terms = c(1, 2, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7),
                      axis.labels = c("Working Memory", "Extraversion x Effortful Control", "Negative Affect x Effortful Control", "Effortful Control", "Extraversion", "Negative Affect"), 
                     axis.title = "", 
                     width = 0.2, transform = NULL
                     ) +
    # font_size(offset.x = 10) +
    # font_size(base.theme = theme_classic()) +
    theme(text = element_text(color = "black"), 
          axis.text = element_text(colour = "black", size = 12), 
          axis.ticks.y = element_blank(),
          )

# show plot
mod1_reg_plot

Reduced model

mod1_reduce<-step(mod1)
mod11_reduce<-step(mod11)

Reducted model results

summary(mod1_reduce)
## 
## Call:
## lm(formula = avg_cat_perf ~ extraversion + Rspan_score, data = new_complete)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30340 -0.04458  0.02921  0.06810  0.15011 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.6042642  0.0661876   9.130 3.24e-16 ***
## extraversion 0.0180551  0.0101587   1.777   0.0775 .  
## Rspan_score  0.0022414  0.0009066   2.472   0.0145 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09656 on 157 degrees of freedom
## Multiple R-squared:  0.05652,    Adjusted R-squared:  0.0445 
## F-statistic: 4.702 on 2 and 157 DF,  p-value: 0.01039
mod11_reduce<- lm(scale(avg_cat_perf)~scale(extraversion)+scale(Rspan_score), new_complete)
summary(mod11_reduce)
## 
## Call:
## lm(formula = scale(avg_cat_perf) ~ scale(extraversion) + scale(Rspan_score), 
##     data = new_complete)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0715 -0.4513  0.2957  0.6894  1.5196 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         -4.130e-17  7.728e-02   0.000   1.0000  
## scale(extraversion)  1.378e-01  7.753e-02   1.777   0.0775 .
## scale(Rspan_score)   1.917e-01  7.753e-02   2.472   0.0145 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9775 on 157 degrees of freedom
## Multiple R-squared:  0.05652,    Adjusted R-squared:  0.0445 
## F-statistic: 4.702 on 2 and 157 DF,  p-value: 0.01039

Reduced model stats table

mod1_reduce_coef <- cbind(tidy(mod1_reduce)[,1:3], 
                  tidy(mod11_reduce)[,2:3], 
                  tidy(mod1_reduce)[,4:5])
  # remove rownames
  rownames(mod1_reduce_coef) <- c(1:nrow(mod1_reduce_coef))
  # rename cols
  colnames(mod1_reduce_coef) <- c("term", "beta", "beta_SE", "std_beta", "std_beta_SE", "t_val", "p_val")

# finally, create df
mod1_reduce_t <- 
      rbind(mod1_reduce_coef,
      tibble("term" = c("f_stat", "R2/Adj.R2", "N"), 
             "beta" = c(summary(mod1_reduce)$fstatistic[2], summary(mod1_reduce)$r.squared, nrow(mod1_reduce$model)), 
             "beta_SE" = c(summary(mod1_reduce)$fstatistic[3], summary(mod1_reduce)$adj.r.squared, NA), 
             "std_beta" = c(summary(mod1_reduce)$fstatistic[1], rep(NA, 2)), 
             "std_beta_SE" = c(pf(summary(mod1_reduce)$fstatistic[1], summary(mod1_reduce)$fstatistic[2], summary(mod1_reduce)$fstatistic[3], lower.tail = F), rep(NA, 2)), 
             "t_val" = rep(NA, 3), 
             "p_val" = rep(NA, 3)
             )
      )

# remove NA from kable tables
options(knitr.kable.NA = '') 

# show kable table
kable(mod1_reduce_t, caption = "Reduced multiple Regression Model Predicting CR overall average performance", align = rep('lcccc'), digits = 3, row.names = T, col.names = c("", "Beta", "Beta's SE", "Std. Beta", "Std. Beta's SE",  "t-value", "p-value")) %>% 
  kable_paper(full_width = F) %>%
  # add_header_above(c(" ", "mturk" = 2, "Mturk" = 2)) %>%
  # pack_rows("Criterion(s)", 1, 6) %>%
  # # column_spec(1, width = "10em") %>% 
  # pack_rows("Predictor(s)", 7, 19) %>%
  footnote(general = "...") %>% 
  column_spec(c(4), border_left = T) %>% 
  kable_classic()
Reduced multiple Regression Model Predicting CR overall average performance
Beta Beta’s SE Std. Beta Std. Beta’s SE t-value p-value
1 (Intercept) 0.604 0.066 0.000 0.077 9.130 0.000
2 extraversion 0.018 0.010 0.138 0.078 1.777 0.077
3 Rspan_score 0.002 0.001 0.192 0.078 2.472 0.014
4 f_stat 2.000 157.000 4.702 0.010
5 R2/Adj.R2 0.057 0.044
6 N 160.000
Note:
…
# write.csv(mod1_reduce_t,"db_II_reg_reduce.csv", row.names = T)
options(knitr.kable.NA = NA) 

reg_colours <- c("#414535", "#08605F", "#3F826D", "#858585", "#DDA448", "#F0803C", "#98410B" ) # green > orange

# plot lm
  # plot standardized betas to compare cooefficients
mod1_reg_plot <- 
  sjPlot::plot_model(model = mod11_reduce, # lm output 
              title = "", # to remove title
                     # type = "std", # type "std" = standardized betas
                     #sort.est = T, # sort based on betas (highest on top)
                     # wrap.title = 55, # char length of each title line
                     #axis.lim = c(-.25,.25)
                     # grid.breaks = .25, # x-axis breaks 
                     #se = T # use se instead of CI
                     colors = reg_colours,
                     show.values = T, # show betas
                     show.p = T, # show * for sig p
                     p.threshold = c(.049, .009, .0009), # state up to 3 cut-offs for pvals, the number is inclusive**
                     value.offset = 0.4, # offset to beta labels
                     value.size = 3, # size of beta labels
                     digits = 3, # num decimals in beta labels
                     dot.size = 2.5, line.size = 1, 
                     vline.color = "#F98B99",
                     # group.terms = c(1, 2, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7),
                     axis.labels = c("Working Memory", "Extraversion"), 
                     axis.title = "", 
                     width = 0.2, transform = NULL
                     ) +
    # font_size(offset.x = 10) +
    # font_size(base.theme = theme_classic()) +
    theme(text = element_text(color = "black"), 
          axis.text = element_text(colour = "black", size = 12), 
          axis.ticks.y = element_blank(),
          )

# show plot
mod1_reg_plot

Block performance

Boxplot of block performance distribution with line connecting mean

# boxplot of block performance
library(tidyr)
d_long<-gather(new_complete, block, block_perf, block_1:block_6, factor_key = TRUE)

library(stringr)

df_mean <- d_long %>% 
  group_by(block) %>% 
  dplyr::summarize(average = mean(block_perf)) %>%
  ungroup()

df_mean22<-df_mean %>%
   dplyr::mutate(block = str_extract(block, "[^_]+$"))

d_long22<-d_long %>%
   dplyr::mutate(block = str_extract(block, "[^_]+$"))

geom_path(data = d_long22, aes(x = x,y = y), inherit.aes = FALSE )
## mapping: x = ~x, y = ~y 
## geom_path: lineend = butt, linejoin = round, linemitre = 10, arrow = NULL, na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
ggplot(d_long22, aes(x = block, y = block_perf)) + ylab('Proportion Correct') + xlab('Block')+
geom_boxplot(fill='gray85', color="gray2") + theme_classic() + ylim(0.4, 1.0) +
  stat_boxplot(geom='errorbar', width=0.3) + scale_y_continuous(breaks = seq(0.4, 1.0, by = 0.1)) +
geom_point(data = df_mean22, size = 2,
             mapping = aes(x = block, y = average),
             color="black") +
geom_line(data = df_mean22, size=1,
            mapping = aes(x = block, y = average, group=1), color = 'gray10')+
theme(axis.text=element_text(size=12),
        axis.title=element_text(size=16))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

Tukey plot comparing every 2 blocks

block_m<-aov(block_perf~block, d_long)
summary(block_m)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## block         5  1.027 0.20532   15.09 2.78e-14 ***
## Residuals   954 12.982 0.01361                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# library(lme4)
# library(lmerTest)
# block_m1<-lmer(block_perf~block+(1|UniqueID), d_long)
# summary(block_m1)
tuk<-TukeyHSD(block_m)

d_long$block<- as.character(d_long$block)


tuk_plot <- function (x, xlab, ylab, ylabels = NULL, ...) {
  for (i in seq_along(x)) {
    xi <- x[[i]][, -4L, drop = FALSE]
    yvals <- nrow(xi):1L
    dev.hold()
    on.exit(dev.flush())
    plot(c(xi[, "lwr"], xi[, "upr"]), rep.int(yvals, 2L), 
         type = "n", axes = FALSE, xlab = "", ylab = "", main = NULL, 
         ...)
    axis(1, ...)
    # change for custom axis labels
    if (is.null(ylabels)) ylabels <- dimnames(xi)[[1L]]

    axis(2, at = nrow(xi):1, labels = ylabels, 
         srt = 0, ...)
    abline(h = yvals, lty = 1, lwd = 1, col = "lightgray")
    abline(v = 0, lty = 2, lwd = 1, ...)
    segments(xi[, "lwr"], yvals, xi[, "upr"], yvals, ...)
    segments(as.vector(xi), rep.int(yvals - 0.1, 3L), as.vector(xi), 
             rep.int(yvals + 0.1, 3L), ...)
    title(main ="", 
          # change for custom axis titles
          xlab = xlab, ylab = ylab)

    box()
    dev.flush()
    on.exit()
  }
}


with(par(mai=c(0.7,0.8,0.5,0.5), cex.lab=1.2, cex.axis=1),{tuk_plot(tuk,  " ", "Block",c("2-1","3-1","4-1","5-1","6-1","3-2","4-2","5-2","6-2","4-3","5-3","6-3","5-4","6-4","6-5"),las=2)})