library(tidyverse)
library(zoo)
library(knitr)
library(tibble)
library(plyr)
library(dplyr)
library(data.table)
library(Hmisc)
library(grt)
library(reshape) #reshape data from long to wide
library(MASS) #backward elimination of non-significant predictors in linear models
library(ggcorrplot)
library(nnet)
library(ggplot2)
library(ltm) #cronbach alpha
#need this to work on Mac
logLik.grg <- function(object, ...)
{
  val <- object$logLik
  class(val) <- "logLik"
  val
}

Temperament Data

df <- read.csv("exp_qualtrics_CR_2.csv", header=T)
df[13:74]<-df[13:74] %>% mutate_all(~ifelse(is.na(.x), mean(.x, na.rm = TRUE), .x)) #fill na with col average
colnames(df)[which(names(df) == "PA.3_1")] <- "EX.3_1"
colnames(df)[which(names(df) == "EX.40_1")] <- "EC.40_1"

cols.num <- c(13:74) #find ATQ columns
df[cols.num] <- sapply(df[cols.num],as.numeric) #convert to numerics
#sapply(df, class)
indx <- grepl('R', colnames(df)) #find reverse coded items
df[,indx]<-abs(df[,indx]-8) #reverse coding
NA_indx <- grepl('NA', colnames(df)) #find Neg_Affect items
NA_sum <- rowSums(df[, NA_indx]) #total score of NA items for each participant
df$NA_score <- NA_sum/25 #append average NA scores to df
cronbach.alpha(df[,NA_indx])
## 
## Cronbach's alpha for the 'df[, NA_indx]' data-set
## 
## Items: 26
## Sample units: 284
## alpha: 0.791
EC_indx <- grepl('EC', colnames(df)) #find EC items
EC_sum <- rowSums(df[, EC_indx]) #total score of EC items for each participant
df$EC_score <- EC_sum/19 #append EC scores to df
cronbach.alpha(df[,EC_indx])
## 
## Cronbach's alpha for the 'df[, EC_indx]' data-set
## 
## Items: 17
## Sample units: 284
## alpha: 0.849
EX_indx <- grepl('EX', colnames(df)) #find EX items
EX_sum <- rowSums(df[, EX_indx]) #total score of EX items for each participant
df$EX_score <- EX_sum/17 #append EX scores to df
cronbach.alpha(df[,EX_indx])
## 
## Cronbach's alpha for the 'df[, EX_indx]' data-set
## 
## Items: 19
## Sample units: 284
## alpha: 0.771
df_temp <-df[,c(1, 3:12, 75:77)]
#generate columns that take temperament factors as low/medium/high levels for potential analyses
df_temp$EC_level <- as.factor(ifelse(df_temp$EC_score<=mean(df_temp$EC_score),"low","high"))

df_temp$NA_level <- as.factor(ifelse(df_temp$NA_score<=mean(df_temp$NA_score),"low","high"))

df_temp$EX_level <- as.factor(ifelse(df_temp$EX_score<=mean(df_temp$EX_score),"low","high"))

Pavlovia Data

setwd( "/home/tzhu/Desktop/decisionBound_data/exp_CR_analysis/completed_exp_CR_data")

csv <- list.files(full.names = TRUE, pattern = '*.csv')

df_Pav <- as_tibble(rbind.fill(lapply (csv, fread)))
#subset dataset

# which(colnames(df_Pav)=="exp_Sentence")
# which(colnames(df_Pav)=="catlearn_corrAns")
cols1 <- c(1, 85:132)
df_Pav<-df_Pav[,cols1]
# which(colnames(df_Pav)=="exp_typedWord")
# which(colnames(df_Pav)=="numberWords_recalled")
# which(colnames(df_Pav)=="key_resp_20.keys")
# which(colnames(df_Pav)=="II_stim_file")
cols2 <- c(1:6,12,20:21,31:33,40:47, 49)
df_Pav<-df_Pav[,cols2] 
#fill empty cells with NA
df_Pav <- df_Pav %>%
mutate_at(vars(colnames(.)),
        .funs = funs(ifelse(.=="", NA, as.character(.))))

#remove empty rows
df_Pav<-df_Pav[!is.na(df_Pav$exp_Sentence) | !is.na(df_Pav$exp_typedWord) | !is.na(df_Pav$key_resp_20.keys),]

Rspan data

# which(colnames(df_Pav)=="sentence_num_total")
df_rspan<-df_Pav[, 1:9] #subset relevant cols
df_rspan<-df_rspan[!is.na(df_rspan$exp_Sentence) | !is.na(df_rspan$exp_typedWord),] #keep rows with either judgment or recall responses
cols4 <- c(4, 6, 8:9)
df_rspan[cols4] <- sapply(df_rspan[cols4],as.numeric)
#keep only judgment related rows
accuracy <- df_rspan %>% 
  filter(!is.na(exp_isCorrect))

#late responses on judgment are filled with rt of 4.5 
accuracy$key_resp_10.rt[is.na(accuracy$key_resp_10.rt)] <- 4.5
#keep only recalled_word related rows
recall_num <- df_rspan %>% 
  filter(!is.na(numberWords_recalled))
recall_num$exp_typedWord<-gsub("comma", " ", recall_num$exp_typedWord, fixed = TRUE) #1 ss separated words with comma, turned words into one long string

recall_num$number_answered<-sapply(strsplit(recall_num$exp_typedWord, " "), length)
recall_num$intrusion_error<-recall_num$number_answered - recall_num$numberWords_recalled #intrusion are the incorrect words typed

RSpan Performance data

#accuracy dataframe
df1<-aggregate(accuracy[, c(4,6)], list(accuracy$UniqueID), mean)#average for each Ss performance
#recalled number dataframe
df2<-aggregate(recall_num[, c(8,11)], list(recall_num$UniqueID), sum)
df_rspan_perf <- merge(df1,df2,by="Group.1")
names(df_rspan_perf)[1] <- "UniqueID"

df_rspan_perf<-df_rspan_perf[!df_rspan_perf$UniqueID=='3103wr',]
df_rspan_perf<-df_rspan_perf[!df_rspan_perf$UniqueID=='680248tt',]
head(df_rspan_perf)

Merge temperament df with Rspan df

df_tem_rspan<-merge(df_temp, df_rspan_perf, by = "UniqueID")
head(df_tem_rspan)

Category learning data

cols3<- c(1, 10:21)
df_catlearn<-df_Pav[, cols3] #remove irrelevant cols
df_catlearn<-df_catlearn[!is.na(df_catlearn$key_resp_20.keys),] #remove empty rows

df_catlearn$block <- rep(1:6, times=198, each = 60)
df_catlearn<- df_catlearn[, c(1, 14, 2:13)]

df_catlearn$key_resp_20.corr<-as.numeric(df_catlearn$key_resp_20.corr)
#describe(df_catlearn)
perf_table<- df_catlearn %>%
    group_by(UniqueID) %>%
    dplyr::summarize(Mean = mean(key_resp_20.corr, na.rm=TRUE))

mean(perf_table$Mean)
## [1] 0.7120276
sd(perf_table$Mean)
## [1] 0.08678907

Category Learning Strategy Fitting

df_catlearn <-(df_catlearn %>% mutate(catlearn_corrAns = case_when(
  catlearn_corrAns == "a" ~ 1,
  catlearn_corrAns == "b" ~ 2))
)

#relabel the key_resp_3.keys as 1 and 2
df_catlearn<-(df_catlearn %>% mutate(key_resp_20.keys = case_when(
  key_resp_20.keys == "a" ~ 1,
  key_resp_20.keys == "b" ~ 2))
)

cols5<-c(4, 5, 7:13)
df_catlearn[cols5] <- sapply(df_catlearn[cols5],as.numeric)
dat<-df_catlearn

#template code for combining blocks
# cols6<-c(1, 3:14)
# dat<-df_catlearn[, cols6]
# dat$block <-rep(1:6, times = 24, each=60)
# source("plot.gcjc.R")

blocks <- levels(factor(dat$block))
sbj <- levels(factor(dat$UniqueID))
models <- c("Orientation", "Length", "Information Integration", "Conjunctive Rule", "Fixed Random", "General Random")
res <- matrix(nrow=length(sbj) * length(blocks),ncol=(length(models) * 2 + 2))
res <- cbind(expand.grid(sbj,blocks), res)
colnames(res)[1:2] <- c("UniqueID","block")
pdf(file="198s_6BLOCK2_cr_individual_plots.pdf", width=12, height=9)
op <- par(mfrow = c(3, 4), pty = "m")

for(i in sbj)
{
  for(j in blocks)
  {
    data <- subset(dat, dat$block == j & dat$UniqueID == i)
    fit.ori <- glc(key_resp_20.keys ~ ori_transform, category=data$catlearn_corrAns, data=data)
    fit.length <- glc(key_resp_20.keys ~ length, category=data$catlearn_corrAns, data=data)
    fit.2d <- glc(key_resp_20.keys ~ length + ori_transform, category=data$catlearn_corrAns, data=data)
    fit.cj <- gcjc(key_resp_20.keys ~ length + ori_transform,
                   category=data$catlearn_corrAns, data=data,
                   equal.noise = TRUE, config = 2,
                   fixed = list(noise = c(FALSE, FALSE), bias = c(FALSE, FALSE)))
    fit.frg <- grg(data$catlearn_corrAns, fixed=T)
    fit.grg <- grg(data$catlearn_corrAns, fixed=F)
    AICs <- AIC(fit.ori, fit.length, fit.2d, fit.cj, fit.frg, fit.grg)
    BICs <- AIC(fit.ori, fit.length, fit.2d, fit.cj, fit.frg, fit.grg, k=log(60)) #AIC can provide BIC, see ?AIC
    winner <- which.min(AICs$AIC)
    winner2 <- which.min(BICs$AIC)
    res[res$UniqueID==i & res$block == j,3:ncol(res)] <- c(round(AICs$AIC,2), models[winner], round(BICs$AIC,2), models[winner2])
  ##### now plot'em ###RACHEL CHANGED 'winner' to 'winner2' to reflect BIC instead of AIC
    # col <- rep("black",4)
    # col[winner2] <- "red"
    # bg <- c("blue","red")[factor(data$key_resp_20.keys)]
    # pch <- c(21,24)[factor(data$catlearn_corrAns)]
    # title <- paste(i, ": block", j, ": ", models[winner2])
    # plot(ori_transform ~ length, data=data, xlab="Length", ylab="Orientation", bg=bg, pch=pch, xlim=c(0,300), ylim=c(0, 110), main=title)
    # abline(h=coef(fit.ori), col = col[1])
    # abline(v=coef(fit.length), col = col[2])
    # abline(coef(fit.2d), col = col[3])
    # cj.coef <- coef(fit.cj)
    # abline(h=cj.coef[["ori_transform"]], col = col[4], lty = 3)
    # abline(v=cj.coef[["length"]], col = col[4], lty = 3)
  }
}

dev.off()
## png 
##   2
res.nam <- c("ori","length","2d","cj","frg","grg","winner")
colnames(res)[3:16] <- c(paste(res.nam, "AIC", sep="."), paste(res.nam, "BIC", sep="."))

category learning Performance by block

catlearn_perf<- dat %>%
    group_by(UniqueID, block) %>%
    dplyr::summarize(Mean = mean(key_resp_20.corr, na.rm=TRUE))

head(catlearn_perf)
wide1<-cast(catlearn_perf, UniqueID~block) 

Descriptives of Participants Strategy Use

# count the number of time each model is used by each participant
model_block <- res %>% 
  group_by(UniqueID, winner.BIC) %>%
  dplyr::summarise(Count = n())

#how many blocks for each Ss was best fitted by CR model
head(model_block[with(model_block, winner.BIC == "Conjunctive Rule" ), ])
# find the first block each S used the optimal CR strategy
first_cr<- res %>% 
  group_by(UniqueID, block, winner.BIC) %>%
  dplyr::summarise(Count = n())

a<-first_cr[first_cr$winner.BIC == "Conjunctive Rule",]
first_cr<-a %>% 
    group_by(UniqueID) %>% 
    slice(which.min(block))

head(first_cr)

Combine Temperament Rspan data with Catlearn data

#merge df_tem_rspan that has temperament scores and rspan performance with catlearn model fit descriptives
b<-merge(first_cr, df_tem_rspan, all = TRUE)
cols<-c(1:2, 5:24)
b<-b[, cols]
colnames(b)[2] <- "first_CR_block"
b[,2] <- as.numeric(as.character(b[,2]))
#reshape model_block from long to wide, this df contains number of blocks best fitted by each strategy
wide<-cast(model_block, UniqueID ~ winner.BIC)
b<-merge(wide,b,all =TRUE)

complete<-setnames(b, old = c('Conjunctive Rule','Fixed Random','Information Integration', 'Length','Orientation'), new = c('total_CR','total_fixedRand','total_II','total_length','total_ori'))

complete<-complete[!is.na(complete$NA_score),]

# generate a column that keep count of the total number of blocks best fitted by rule-based models, regardless of 1d or 2d

#fill NAs with 0 in the block count for each model
complete[, c(2:6)][is.na(complete[, c(2:6)])] <- 0


complete$total_rule<-complete$total_CR + complete$total_length + complete$total_ori
complete$total_Rand<-complete$total_fixedRand 

complete <- complete[,c(1, 11:16,17,8:10,18:27, 2:7, 28:29)]#rearrange cols

# merge with catlearn accuracy performance of each block
complete<-merge(complete, wide1, by = "UniqueID")

block_strat<-cast(res[,c(1,2,16)], UniqueID~block)
complete<-merge(complete, block_strat, by = "UniqueID")

complete<-complete[!is.na(complete$Age), ]
dt<-complete[,30:35]
complete$avg_cat_perf <- rowSums(dt)/6

complete<-setnames(complete, old = c('1.x','2.x','3.x','4.x', '5.x','6.x', '1.y','2.y','3.y','4.y', '5.y','6.y'), new = c('block_1','block_2','block_3','block_4', 'block_5','block_6', 'block_1_strat','block_2_strat','block_3_strat','block_4_strat', 'block_5_strat','block_6_strat'))


complete$Age[complete$Age == 'Uyghurqi'] <-18
#complete$Age<-as.numeric(complete$Age)
complete$Age<-as.numeric(as.character(complete$Age))

complete$EC_level <- as.factor(ifelse(complete$EC_score<=median(complete$EC_score),"low","high"))

complete$NA_level <- as.factor(ifelse(complete$NA_score<=median(complete$NA_score),"low","high"))

complete$EX_level <- as.factor(ifelse(complete$EX_score<=median(complete$EX_score),"low","high"))

Pie chart of ethnicity

complete$Ethnicity[complete$Ethnicity == 'South Asian'] <-'Asian or Pacific Islander'
library(RColorBrewer)
mytable <- table(complete$Ethnicity)
lbls <- paste(names(mytable), " - ", mytable, sep="")
coul <- brewer.pal(9, "Set3") 

dev.new(width=6,height=4,noRStudioGD = TRUE)
pie(mytable, labels = lbls,col=coul, cex = 1.3,
   main="")

Compare temperament scores across largest ethnicity groups

race_data<-complete[complete$Ethnicity=='White or Caucasian'|complete$Ethnicity=='Asian or Pacific Islander',]
table(race_data$Ethnicity)
## 
## Asian or Pacific Islander        White or Caucasian 
##                        76                        85
t.test(race_data$NA_score~race_data$Ethnicity)
## 
##  Welch Two Sample t-test
## 
## data:  race_data$NA_score by race_data$Ethnicity
## t = -0.74104, df = 154.39, p-value = 0.4598
## alternative hypothesis: true difference in means between group Asian or Pacific Islander and group White or Caucasian is not equal to 0
## 95 percent confidence interval:
##  -0.285519  0.129743
## sample estimates:
## mean in group Asian or Pacific Islander        mean in group White or Caucasian 
##                                4.385746                                4.463634
t.test(race_data$EX_score~race_data$Ethnicity)
## 
##  Welch Two Sample t-test
## 
## data:  race_data$EX_score by race_data$Ethnicity
## t = -0.98024, df = 158.98, p-value = 0.3285
## alternative hypothesis: true difference in means between group Asian or Pacific Islander and group White or Caucasian is not equal to 0
## 95 percent confidence interval:
##  -0.3872468  0.1303502
## sample estimates:
## mean in group Asian or Pacific Islander        mean in group White or Caucasian 
##                                4.930390                                5.058838
t.test(race_data$EC_score~race_data$Ethnicity)
## 
##  Welch Two Sample t-test
## 
## data:  race_data$EC_score by race_data$Ethnicity
## t = -1.44, df = 158.97, p-value = 0.1518
## alternative hypothesis: true difference in means between group Asian or Pacific Islander and group White or Caucasian is not equal to 0
## 95 percent confidence interval:
##  -0.41749644  0.06540148
## sample estimates:
## mean in group Asian or Pacific Islander        mean in group White or Caucasian 
##                                3.841596                                4.017644

Compare temperament scores across genders

gender_data<-complete[complete$Gender=='Female'|complete$Gender=='Male',]
table(gender_data$Gender)
## 
## Female   Male 
##    152     44
t.test(gender_data$NA_score~gender_data$Gender)
## 
##  Welch Two Sample t-test
## 
## data:  gender_data$NA_score by gender_data$Gender
## t = 2.9397, df = 69.768, p-value = 0.004452
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
##  0.1068884 0.5580029
## sample estimates:
## mean in group Female   mean in group Male 
##             4.516514             4.184068
t.test(gender_data$EX_score~gender_data$Gender)
## 
##  Welch Two Sample t-test
## 
## data:  gender_data$EX_score by gender_data$Gender
## t = 2.5035, df = 67.39, p-value = 0.01473
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
##  0.07483821 0.66323152
## sample estimates:
## mean in group Female   mean in group Male 
##             5.080312             4.711277
t.test(gender_data$EC_score~gender_data$Gender)
## 
##  Welch Two Sample t-test
## 
## data:  gender_data$EC_score by gender_data$Gender
## t = -0.3668, df = 62.045, p-value = 0.715
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
##  -0.3489803  0.2407652
## sample estimates:
## mean in group Female   mean in group Male 
##             3.933546             3.987654
tes<-res[c(res$block == '1'| res$block=='6'),c(1,2,16)]
tes<-reshape(tes, idvar = "UniqueID", timevar = "block", direction = "wide")
test<-merge(tes,complete, by='UniqueID')

new_complete<-test[,c(1:3, 13:24,29, 32:44)]

new_complete$early_strat[new_complete$winner.BIC.1 == 'Conjunctive Rule'] <-'CR'
new_complete$early_strat[new_complete$winner.BIC.1 == 'Information Integration'] <-'II'
new_complete$early_strat[new_complete$winner.BIC.1 == 'Length'] <-'1DR'
new_complete$early_strat[new_complete$winner.BIC.1 == 'Orientation'] <-'1DR'
new_complete$early_strat[new_complete$winner.BIC.1 == 'Fixed Random'] <-'Rand'

new_complete$final_strat[new_complete$winner.BIC.6 == 'Conjunctive Rule'] <-'CR'
new_complete$final_strat[new_complete$winner.BIC.6 == 'Information Integration'] <-'II'
new_complete$final_strat[new_complete$winner.BIC.6 == 'Length'] <-'1DR'
new_complete$final_strat[new_complete$winner.BIC.6 == 'Orientation'] <-'1DR'
new_complete$final_strat[new_complete$winner.BIC.6 == 'Fixed Random'] <-'Rand'


new_complete[, "max.block"] <- apply(new_complete[, 17:22], 1, max) #find best performing block performance

# remove bad participants
new_complete<-new_complete[!new_complete$max.block<0.6,] 
new_complete<-new_complete[!new_complete$exp_isCorrect<0.8,]
new_complete<-new_complete[!new_complete$intrusion_error>50,]
new_complete<-new_complete[!new_complete$numberWords_recalled>60,]
# nrow(complete)-nrow(new_complete) #total number participants removed
#this block of code create the dataframe for extracting best performing block for each participant and return the strategy being used in that block
long_strat1<-gather(new_complete[,c(1,5:7,13,17:22)], block, block_perf, block_1:block_6, factor_key = TRUE)
long_strat2<-gather(new_complete[,c(1,23:28)], block, block_strat, block_1_strat:block_6_strat, factor_key = TRUE)

long_strat1$block<- as.character(long_strat1$block)
long_strat2$block<- as.character(long_strat2$block)

long_strat2$block[long_strat2$block == "block_1_strat"] <- "block_1"
long_strat2$block[long_strat2$block == "block_2_strat"] <- "block_2"
long_strat2$block[long_strat2$block == "block_3_strat"] <- "block_3"
long_strat2$block[long_strat2$block == "block_4_strat"] <- "block_4"
long_strat2$block[long_strat2$block == "block_5_strat"] <- "block_5"
long_strat2$block[long_strat2$block == "block_6_strat"] <- "block_6"

long_strat<-merge(long_strat1,long_strat2,by=c('UniqueID','block'))

long_strat$block_strat[long_strat$block_strat=='Information Integration']<-'II'
long_strat$block_strat[long_strat$block_strat=='Conjunctive Rule']<-'CR'
long_strat$block_strat[long_strat$block_strat=='Orientation']<-'simple_rule'
long_strat$block_strat[long_strat$block_strat=='Length']<-'simple_rule'
long_strat$block_strat[long_strat$block_strat=='Fixed Random']<-'Random'

require(data.table) ## 1.9.2
group <- as.data.table(long_strat)
df_bestBlock_strat<-group[group[, .I[which.max(block_perf)], by=UniqueID]$V1]
# strategy multinomial logistic regression
require(foreign)
## Loading required package: foreign
require(nnet)
require(ggplot2)
require(reshape2)
## Loading required package: reshape2
## 
## Attaching package: 'reshape2'
## The following objects are masked from 'package:reshape':
## 
##     colsplit, melt, recast
## The following objects are masked from 'package:data.table':
## 
##     dcast, melt
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(mlogit)
## Loading required package: dfidx
## 
## Attaching package: 'dfidx'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate
## The following object is masked from 'package:stats':
## 
##     filter
# df_bestBlock_strat <-df_bestBlock_strat[!df_bestBlock_strat$block_strat=='Random',]
# 
# df_bestBlock_strat$block_strat <- relevel(factor(df_bestBlock_strat$block_strat), ref = "simple_rule")
# test <- multinom(block_strat ~ NA_score + EC_score + EX_score + numberWords_recalled, data = df_bestBlock_strat)


ggplot(new_complete, aes(x = NA_score, y = block_1, color = early_strat)) +
  geom_point(size = 4)+ xlab('Negative Affect') + ylab('First Block Performance') + theme_classic()+theme(
        legend.position = "none",
        text = element_text(color = "black"), 
        axis.text = element_text(colour = "black"), 
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size=20),
        axis.title = element_text(size=20)
        )+ theme(legend.position="none")

ggplot(new_complete, aes(x = EC_score, y = block_1, color = early_strat)) +
  geom_point(size = 4)+ xlab('Effortful Control') + ylab(' ') + theme_classic()+ theme_classic()+theme(
        legend.position = "none",
        text = element_text(color = "black"), 
        axis.text = element_text(colour = "black"), 
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size=20),
        axis.title = element_text(size=20)
        )+ theme(legend.position="none")

ggplot(new_complete, aes(x = EX_score, y = block_1, color = early_strat)) +
  geom_point(size = 4)+ xlab('Extraversion') + ylab('First Block Performance') +  theme_classic()+theme(
        legend.position = "none",
        text = element_text(color = "black"), 
        axis.text = element_text(colour = "black"), 
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size=20),
        axis.title = element_text(size=20)
        )+ theme(legend.position="none")

ggplot(new_complete, aes(x = numberWords_recalled, y = block_1, color = early_strat)) +
  geom_point(size = 4)+ xlab('Working Memory') + ylab(' ') +  theme_classic()+theme(
        legend.position = "none",
        text = element_text(color = "black"), 
        axis.text = element_text(colour = "black"), 
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size=12),
        axis.title = element_text(size=20)
        )+ theme(legend.position="none")

# ggplot(new_complete, aes(x = numberWords_recalled, y = block_1, color = early_strat)) +
#   geom_point(size = 4)+ xlab('working memory') + ylab('first block performance') + theme_classic()+ guides(color=guide_legend("Strategy")) 
require(foreign)
require(nnet)
require(ggplot2)
require(reshape2)

# df_bestBlock_strat <-df_bestBlock_strat[!df_bestBlock_strat$block_strat=='CR',]
# 
# df_bestBlock_strat$block_strat <- relevel(factor(df_bestBlock_strat$block_strat), ref = "Rand")
# test <- multinom(block_strat ~ NA_score + EC_score + EX_score + numberWords_recalled, data = df_bestBlock_strat)
# summary(test)

a<-new_complete
a$early_strat[a$early_strat=='CR']  <- "1DR" 
a$early_strat <- relevel(factor(a$early_strat), ref = "Rand")
test <- multinom(early_strat ~ NA_score + EC_score + EX_score + numberWords_recalled, data = a)
## # weights:  18 (10 variable)
## initial  value 194.454375 
## iter  10 value 118.474922
## iter  20 value 117.304467
## final  value 117.299470 
## converged
summary(test)
## Call:
## multinom(formula = early_strat ~ NA_score + EC_score + EX_score + 
##     numberWords_recalled, data = a)
## 
## Coefficients:
##     (Intercept)   NA_score   EC_score    EX_score numberWords_recalled
## 1DR   7.1274255 0.09280636 -0.2594731 -0.45576468          -0.03948785
## II    0.5062842 0.96490165 -0.3085350 -0.06056204          -0.05539255
## 
## Std. Errors:
##     (Intercept)  NA_score  EC_score  EX_score numberWords_recalled
## 1DR    5.091436 0.5222080 0.4429832 0.3789029           0.04071175
## II     6.061743 0.6302251 0.5214419 0.4453413           0.04692491
## 
## Residual Deviance: 234.5989 
## AIC: 254.5989
z <- summary(test)$coefficients/summary(test)$standard.errors
p <- (1 - pnorm(abs(z), 0, 1)) * 2
p
##     (Intercept)  NA_score  EC_score EX_score numberWords_recalled
## 1DR   0.1615477 0.8589436 0.5580501 0.229033            0.3320776
## II    0.9334371 0.1257588 0.5540543 0.891829            0.2378210
table(new_complete$early_strat)
## 
##  1DR   CR   II Rand 
##  124   11   28   14

descriptives of the predictors

setnames(new_complete, old = c('NA_score','EC_score','EX_score','numberWords_recalled'), new = c('negative_affect','effortful_control','extraversion','Rspan_score'))

summary(new_complete[,c("negative_affect","extraversion","effortful_control","Rspan_score")])
##  negative_affect  extraversion   effortful_control  Rspan_score   
##  Min.   :2.520   Min.   :2.353   Min.   :2.000     Min.   :24.00  
##  1st Qu.:4.009   1st Qu.:4.529   1st Qu.:3.526     1st Qu.:40.00  
##  Median :4.401   Median :5.118   Median :4.000     Median :48.00  
##  Mean   :4.437   Mean   :4.988   Mean   :3.995     Mean   :46.23  
##  3rd Qu.:4.920   3rd Qu.:5.588   3rd Qu.:4.526     3rd Qu.:52.00  
##  Max.   :5.880   Max.   :6.706   Max.   :5.737     Max.   :60.00

histogram distribution of overall average performance

CR_overall_hist<-hist(new_complete$avg_cat_perf, col='yellow',main="",ylim=c(0,50),xlim=c(0.55,0.9),labels = T, xlab = 'Average CR Task Performance', cex.lab = 1.2)

# ggsave("CR_overall_hist.jpg", height = 4, width = 6, units = "in", dpi = 1000)

mean(new_complete$avg_cat_perf)
## [1] 0.7251099
sd(new_complete$avg_cat_perf)
## [1] 0.07240372

get descriptives for predictors

x<-new_complete[,c(5:7, 13)]
sapply(x, function(x) c( "Stand dev" = sd(x), 
                         "Mean"= mean(x,na.rm=TRUE),
                         "Median" = median(x),
                         "Minimum" = min(x),
                         "Maximun" = max(x),
                         "Upper Quantile" = quantile(x,1),
                         "LowerQuartile" = quantile(x,0)
                    )
)
##                     negative_affect effortful_control extraversion Rspan_score
## Stand dev                 0.6919895         0.7979255     0.867830    7.622097
## Mean                      4.4371133         3.9954955     4.987532   46.225989
## Median                    4.4008481         4.0000000     5.117647   48.000000
## Minimum                   2.5200000         2.0000000     2.352941   24.000000
## Maximun                   5.8800000         5.7368421     6.705882   60.000000
## Upper Quantile.100%       5.8800000         5.7368421     6.705882   60.000000
## LowerQuartile.0%          2.5200000         2.0000000     2.352941   24.000000

Check correlation matrix among predictors

df_for_tables<- new_complete

df.corr<- rcorr(as.matrix(df_for_tables[,c(5:7, 13)]))
df.corr$r<-round(df.corr$r,3)
df.corr$p<-round(df.corr$P, 5)
df.corr$r[upper.tri(df.corr$r)] <- "-"
df.corr$p[upper.tri(df.corr$r)] <- "-"
df.corr$p
##                   negative_affect effortful_control extraversion Rspan_score
## negative_affect   NA              "-"               "-"          "-"        
## effortful_control "0"             NA                "-"          "-"        
## extraversion      "0.00066"       "0.48109"         NA           "-"        
## Rspan_score       "0.82299"       "0.35669"         "0.47978"    NA

correlation table in R, not sig. level

library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
# print tables using kable
kable(as.data.frame(format(df.corr$r, scientific = FALSE)), caption = "CR Task Predictors - Correlation: r values") %>%
  kable_styling(bootstrap_options = c("hover", "striped"), full_width = F) %>% 
  kable_classic()
CR Task Predictors - Correlation: r values
negative_affect effortful_control extraversion Rspan_score
negative_affect 1
effortful_control -0.502 1
extraversion -0.254 -0.053 1
Rspan_score -0.017 -0.07 0.053 1

Main multiple regression model

Full Model

mod1<-lm(avg_cat_perf~ negative_affect+extraversion+effortful_control+negative_affect*effortful_control+extraversion*effortful_control+Rspan_score, new_complete)
summary(mod1)
## 
## Call:
## lm(formula = avg_cat_perf ~ negative_affect + extraversion + 
##     effortful_control + negative_affect * effortful_control + 
##     extraversion * effortful_control + Rspan_score, data = new_complete)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.167994 -0.052148  0.000274  0.055715  0.140086 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)                        0.4076248  0.2584631   1.577    0.117
## negative_affect                    0.0612089  0.0394085   1.553    0.122
## extraversion                      -0.0095620  0.0298089  -0.321    0.749
## effortful_control                  0.0690935  0.0609123   1.134    0.258
## Rspan_score                        0.0010749  0.0007215   1.490    0.138
## negative_affect:effortful_control -0.0144466  0.0096084  -1.504    0.135
## extraversion:effortful_control     0.0010038  0.0072204   0.139    0.890
## 
## Residual standard error: 0.07234 on 170 degrees of freedom
## Multiple R-squared:  0.03588,    Adjusted R-squared:  0.001849 
## F-statistic: 1.054 on 6 and 170 DF,  p-value: 0.3922
# standardized beta
mod11<- lm(scale(avg_cat_perf)~scale(negative_affect)+scale(extraversion)+scale(effortful_control)+scale(negative_affect*effortful_control)+scale(extraversion*effortful_control)+scale(Rspan_score), new_complete)
summary(mod11)
## 
## Call:
## lm(formula = scale(avg_cat_perf) ~ scale(negative_affect) + scale(extraversion) + 
##     scale(effortful_control) + scale(negative_affect * effortful_control) + 
##     scale(extraversion * effortful_control) + scale(Rspan_score), 
##     data = new_complete)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.32023 -0.72023  0.00379  0.76950  1.93480 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                 4.417e-16  7.510e-02   0.000
## scale(negative_affect)                      5.850e-01  3.766e-01   1.553
## scale(extraversion)                        -1.146e-01  3.573e-01  -0.321
## scale(effortful_control)                    7.614e-01  6.713e-01   1.134
## scale(negative_affect * effortful_control) -6.578e-01  4.375e-01  -1.504
## scale(extraversion * effortful_control)     7.223e-02  5.196e-01   0.139
## scale(Rspan_score)                          1.132e-01  7.596e-02   1.490
##                                            Pr(>|t|)
## (Intercept)                                   1.000
## scale(negative_affect)                        0.122
## scale(extraversion)                           0.749
## scale(effortful_control)                      0.258
## scale(negative_affect * effortful_control)    0.135
## scale(extraversion * effortful_control)       0.890
## scale(Rspan_score)                            0.138
## 
## Residual standard error: 0.9991 on 170 degrees of freedom
## Multiple R-squared:  0.03588,    Adjusted R-squared:  0.001849 
## F-statistic: 1.054 on 6 and 170 DF,  p-value: 0.3922
library("lm.beta")
# lm.beta(mod1) #quick check standardized value 

Scatterplot of correlational relationship between each predictor and DV

library(tidyr)
predictor_corr_data<-gather(new_complete, predictor, Score, c(5:7,13), factor_key = TRUE)
reg_colours <- c("#414535", "#08605F", "#3F826D", "#858585", "#DDA448", "#F0803C", "#98410B" ) # green > orange

predictor_names <- list(
  'negative_affect'="Negative Affect",
  'extraversion'="Extraversion",
  'effortful_control'="Effortful Control",
  'Rspan_score'="Working Memory"
)

predictor_labeller <- function(variable,value){
  return(predictor_names[value])
}

predictor_scatter <-
  ggplot(predictor_corr_data[,c(25,29,30)], aes(y = avg_cat_perf, x = Score, colour = factor(predictor))) +
  geom_point(shape = 1) + 
  scale_color_manual(values = reg_colours[c(2,2,2,2)]) +
  facet_wrap(predictor ~ ., labeller=predictor_labeller, scales = "free_x")+
  # facet_wrap(~ predictor, ncol = 2, scales = "free_x") +
  geom_smooth(method = "lm") +
  xlab("Predictor Value")+
  ylab("Overall Average Performance") +
  # geom_line(intercept = 83.731, slopr = .5)+
  # add theme
  # font_size(offset.x = 10) +
  # font_size(base.theme = theme_classic()) +
  theme(text = element_text(size=20),
        axis.text.x = element_text(angle=0, hjust=2)) + 
  theme_classic() +
  theme(
        legend.position = "none",
        text = element_text(color = "black"), 
        axis.text = element_text(colour = "black"), 
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size=12),
        axis.title = element_text(size=12)
        )
## Warning: The labeller API has been updated. Labellers taking `variable` and
## `value` arguments are now deprecated. See labellers documentation.
# show fig
predictor_scatter  
## `geom_smooth()` using formula 'y ~ x'

Regression stats table in R

# create table depicting main stats to export later
  # use tidy() in broom package & DescTools::StdCoef()
library(broom)
library(DescTools)
## 
## Attaching package: 'DescTools'
## The following objects are masked from 'package:Hmisc':
## 
##     %nin%, Label, Mean, Quantile
## The following object is masked from 'package:data.table':
## 
##     %like%
mod1_coef <- cbind(tidy(mod1)[,1:3], 
                  tidy(mod11)[,2:3], 
                  tidy(mod1)[,4:5])
  # remove rownames
  rownames(mod1_coef) <- c(1:nrow(mod1_coef))
  # rename cols
  colnames(mod1_coef) <- c("term", "beta", "beta_SE", "std_beta", "std_beta_SE", "t_val", "p_val")

# finally, create df
mod1_t <- 
      rbind(mod1_coef,
      tibble("term" = c("f_stat", "R2/Adj.R2", "N"), 
             "beta" = c(summary(mod1)$fstatistic[2], summary(mod1)$r.squared, nrow(mod1$model)), 
             "beta_SE" = c(summary(mod1)$fstatistic[3], summary(mod1)$adj.r.squared, NA), 
             "std_beta" = c(summary(mod1)$fstatistic[1], rep(NA, 2)), 
             "std_beta_SE" = c(pf(summary(mod1)$fstatistic[1], summary(mod1)$fstatistic[2], summary(mod1)$fstatistic[3], lower.tail = F), rep(NA, 2)), 
             "t_val" = rep(NA, 3), 
             "p_val" = rep(NA, 3)
             )
      )

# remove NA from kable tables
options(knitr.kable.NA = '') 

# show kable table
kable(mod1_t, caption = "Multiple Regression Model Predicting CR overall average performance", align = rep('lcccc'), digits = 3, row.names = T, col.names = c("", "Beta", "Beta's SE", "Std. Beta", "Std. Beta's SE",  "t-value", "p-value")) %>% 
  kable_paper(full_width = F) %>%
  # add_header_above(c(" ", "mturk" = 2, "Mturk" = 2)) %>%
  # pack_rows("Criterion(s)", 1, 6) %>%
  # # column_spec(1, width = "10em") %>% 
  # pack_rows("Predictor(s)", 7, 19) %>%
  footnote(general = "...") %>% 
  column_spec(c(4), border_left = T) %>% 
  kable_classic()
Multiple Regression Model Predicting CR overall average performance
Beta Beta’s SE Std. Beta Std. Beta’s SE t-value p-value
1 (Intercept) 0.408 0.258 0.000 0.075 1.577 0.117
2 negative_affect 0.061 0.039 0.585 0.377 1.553 0.122
3 extraversion -0.010 0.030 -0.115 0.357 -0.321 0.749
4 effortful_control 0.069 0.061 0.761 0.671 1.134 0.258
5 Rspan_score 0.001 0.001 -0.658 0.438 1.490 0.138
6 negative_affect:effortful_control -0.014 0.010 0.072 0.520 -1.504 0.135
7 extraversion:effortful_control 0.001 0.007 0.113 0.076 0.139 0.890
8 f_stat 6.000 170.000 1.054 0.392
9 R2/Adj.R2 0.036 0.002
10 N 177.000
Note:
…
# write.csv(mod1_t,"db_CR_reg.csv", row.names = T) #always check row order across models

forest plot

# reset NA vals in kable tables to "NA"
options(knitr.kable.NA = NA) 

reg_colours <- c("#414535", "#08605F", "#3F826D", "#858585", "#DDA448", "#F0803C", "#98410B" ) # green > orange

# plot lm
  # plot standardized betas to compare cooefficients
mod1_reg_plot <- 
  sjPlot::plot_model(model = mod11, # lm output 
              title = "", # to remove title
                     # type = "std", # type "std" = standardized betas
                     #sort.est = T, # sort based on betas (highest on top)
                     wrap.title = 55, # char length of each title line
                     #axis.lim = c(-.25,.25)
                     # grid.breaks = .25, # x-axis breaks 
                     #se = T # use se instead of CI
                     colors = reg_colours,
                     show.values = T, # show betas
                     show.p = T, # show * for sig p
                     p.threshold = c(.049, .009, .0009), # state up to 3 cut-offs for pvals, the number is inclusive**
                     value.offset = 0.4, # offset to beta labels
                     value.size = 3, # size of beta labels
                     digits = 3, # num decimals in beta labels
                     dot.size = 2.5, line.size = 1, 
                     vline.color = "#F98B99",
                     # group.terms = c(1, 2, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7),
                     axis.labels = c("Working Memory", "Extraversion x Effortful Control", "Negative Affect x Effortful Control", "Effortful Control", "Extraversion", "Negative Affect"), 
                     axis.title = "", 
                     width = 0.2, transform = NULL
                     ) +
    # font_size(offset.x = 10) +
    # font_size(base.theme = theme_classic()) +
    theme(text = element_text(color = "black"), 
          axis.text = element_text(colour = "black", size = 12), 
          axis.ticks.y = element_blank(),
          )
## Registered S3 method overwritten by 'parameters':
##   method                         from      
##   format.parameters_distribution datawizard
# show plot
mod1_reg_plot

Reduced model

mod1_reduce<-step(mod1)
mod11_reduce<-step(mod11)
summary(mod1_reduce)
summary(mod11_reduce)
mod1_reduce_coef <- cbind(tidy(mod1_reduce)[,1:3], 
                  tidy(mod11_reduce)[,2:3], 
                  tidy(mod1_reduce)[,4:5])
  # remove rownames
  rownames(mod1_reduce_coef) <- c(1:nrow(mod1_reduce_coef))
  # rename cols
  colnames(mod1_reduce_coef) <- c("term", "beta", "beta_SE", "std_beta", "std_beta_SE", "t_val", "p_val")

# finally, create df
mod1_reduce_t <- 
      rbind(mod1_reduce_coef,
      tibble("term" = c("f_stat", "R2/Adj.R2", "N"), 
             "beta" = c(summary(mod1_reduce)$fstatistic[2], summary(mod1_reduce)$r.squared, nrow(mod1_reduce$model)), 
             "beta_SE" = c(summary(mod1_reduce)$fstatistic[3], summary(mod1_reduce)$adj.r.squared, NA), 
             "std_beta" = c(summary(mod1_reduce)$fstatistic[1], rep(NA, 2)), 
             "std_beta_SE" = c(pf(summary(mod1_reduce)$fstatistic[1], summary(mod1_reduce)$fstatistic[2], summary(mod1_reduce)$fstatistic[3], lower.tail = F), rep(NA, 2)), 
             "t_val" = rep(NA, 3), 
             "p_val" = rep(NA, 3)
             )
      )

# remove NA from kable tables
options(knitr.kable.NA = '') 

# show kable table
kable(mod1_reduce_t, caption = "Reduced multiple Regression Model Predicting CR overall average performance", align = rep('lcccc'), digits = 3, row.names = T, col.names = c("", "Beta", "Beta's SE", "Std. Beta", "Std. Beta's SE",  "t-value", "p-value")) %>% 
  kable_paper(full_width = F) %>%
  # add_header_above(c(" ", "mturk" = 2, "Mturk" = 2)) %>%
  # pack_rows("Criterion(s)", 1, 6) %>%
  # # column_spec(1, width = "10em") %>% 
  # pack_rows("Predictor(s)", 7, 19) %>%
  footnote(general = "...") %>% 
  column_spec(c(4), border_left = T) %>% 
  kable_classic()
Reduced multiple Regression Model Predicting CR overall average performance
Beta Beta’s SE Std. Beta Std. Beta’s SE t-value p-value
1 (Intercept) 0.352 0.189 0.000 0.075 1.866 0.064
2 negative_affect 0.062 0.039 0.593 0.371 1.598 0.112
3 effortful_control 0.074 0.043 0.811 0.472 1.720 0.087
4 Rspan_score 0.001 0.001 -0.639 0.427 1.476 0.142
5 negative_affect:effortful_control -0.014 0.009 0.111 0.075 -1.497 0.136
6 f_stat 4.000 172.000 1.416 0.231
7 R2/Adj.R2 0.032 0.009
8 N 177.000
Note:
…
# write.csv(mod1_reduce_t,"db_CR_reg_reduce.csv", row.names = T) be careful the row orders are different across models, need to change in final csv file

Reducted model results

summary(mod11_reduce)
## 
## Call:
## lm(formula = scale(avg_cat_perf) ~ scale(negative_affect) + scale(effortful_control) + 
##     scale(negative_affect * effortful_control) + scale(Rspan_score), 
##     data = new_complete)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.30563 -0.72232 -0.01586  0.75801  1.92121 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                 4.905e-16  7.481e-02   0.000
## scale(negative_affect)                      5.925e-01  3.708e-01   1.598
## scale(effortful_control)                    8.109e-01  4.716e-01   1.720
## scale(negative_affect * effortful_control) -6.392e-01  4.269e-01  -1.497
## scale(Rspan_score)                          1.114e-01  7.549e-02   1.476
##                                            Pr(>|t|)  
## (Intercept)                                  1.0000  
## scale(negative_affect)                       0.1118  
## scale(effortful_control)                     0.0873 .
## scale(negative_affect * effortful_control)   0.1362  
## scale(Rspan_score)                           0.1419  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9953 on 172 degrees of freedom
## Multiple R-squared:  0.03189,    Adjusted R-squared:  0.009375 
## F-statistic: 1.416 on 4 and 172 DF,  p-value: 0.2305
lm.beta(mod1_reduce)
## 
## Call:
## lm(formula = avg_cat_perf ~ negative_affect + effortful_control + 
##     Rspan_score + negative_affect:effortful_control, data = new_complete)
## 
## Standardized Coefficients::
##                       (Intercept)                   negative_affect 
##                                NA                         0.5925295 
##                 effortful_control                       Rspan_score 
##                         0.8109380                         0.1114048 
## negative_affect:effortful_control 
##                        -0.6391542
options(knitr.kable.NA = NA) 

reg_colours <- c("#414535", "#08605F", "#3F826D", "#858585", "#DDA448", "#F0803C", "#98410B" ) # green > orange

# plot lm
  # plot standardized betas to compare cooefficients
mod1_reg_plot <- 
  sjPlot::plot_model(model = mod11_reduce, # lm output 
              title = "", # to remove title
                     # type = "std", # type "std" = standardized betas
                     #sort.est = T, # sort based on betas (highest on top)
                     wrap.title = 55, # char length of each title line
                     #axis.lim = c(-.25,.25)
                     # grid.breaks = .25, # x-axis breaks 
                     #se = T # use se instead of CI
                     colors = reg_colours,
                     show.values = T, # show betas
                     show.p = T, # show * for sig p
                     p.threshold = c(.049, .009, .0009), # state up to 3 cut-offs for pvals, the number is inclusive**
                     value.offset = 0.4, # offset to beta labels
                     value.size = 3, # size of beta labels
                     digits = 3, # num decimals in beta labels
                     dot.size = 2.5, line.size = 1, 
                     vline.color = "#F98B99",
                     # group.terms = c(1, 2, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7),
                     axis.labels = c("Working Memory", "Negative Affect x Effortful Control", "Effortful Control", "Negative Affect"), 
                     axis.title = "", 
                     width = 0.2, transform = NULL
                     ) +
    # font_size(offset.x = 10) +
    # font_size(base.theme = theme_classic()) +
    theme(text = element_text(color = "black"), 
          axis.text = element_text(colour = "black", size = 12), 
          axis.ticks.y = element_blank(),
          )

# show plot
mod1_reg_plot

mod1_reg_plot_1 <- 
  sjPlot::plot_model(model = mod1_reduce, # lm output 
              title = "", # to remove title
                     type = "std", # type "std" = standardized betas
                     #sort.est = T, # sort based on betas (highest on top)
                     # wrap.title = 55, # char length of each title line
                     #axis.lim = c(-.25,.25)
                     # grid.breaks = .25, # x-axis breaks 
                     #se = T # use se instead of CI
                     colors = reg_colours,
                     show.values = T, # show betas
                     show.p = T, # show * for sig p
                     p.threshold = c(.049, .009, .0009), # state up to 3 cut-offs for pvals, the number is inclusive**
                     value.offset = 0.4, # offset to beta labels
                     value.size = 3, # size of beta labels
                     digits = 3, # num decimals in beta labels
                     dot.size = 2.5, line.size = 1, 
                     vline.color = "#F98B99",
                     # group.terms = c(1, 2, 3, 3, 3, 3, 3, 4, 5, 6, 7, 7, 7),
                     axis.labels = c("Working Memory", "Negative Affect x Effortful Control", "Effortful Control", "Negative Affect"), 
                     axis.title = "", 
                     width = 0.2, transform = NULL
                     ) +
    # font_size(offset.x = 10) +
    # font_size(base.theme = theme_classic()) +
    theme(text = element_text(color = "black"), 
          axis.text = element_text(colour = "black", size = 12), 
          axis.ticks.y = element_blank(),
          )

# show plot
mod1_reg_plot_1

Block performance

Boxplot of block performance distribution with line connecting mean

# boxplot of block performance
library(tidyr)
d_long<-gather(new_complete, block, block_perf, block_1:block_6, factor_key = TRUE)

library(stringr)

df_mean <- d_long %>% 
  group_by(block) %>% 
  dplyr::summarize(average = mean(block_perf)) %>%
  ungroup()

df_mean22<-df_mean %>%
   dplyr::mutate(block = str_extract(block, "[^_]+$"))

d_long22<-d_long %>%
   dplyr::mutate(block = str_extract(block, "[^_]+$"))

geom_path(data = d_long22, aes(x = x,y = y), inherit.aes = FALSE )
## mapping: x = ~x, y = ~y 
## geom_path: lineend = butt, linejoin = round, linemitre = 10, arrow = NULL, na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
ggplot(d_long22, aes(x = block, y = block_perf)) + ylab('Proportion Correct') + xlab('Block')+
geom_boxplot(fill='gray85', color="gray2") + theme_classic() + ylim(0.4, 1.0) +
  stat_boxplot(geom='errorbar', width=0.3) + scale_y_continuous(breaks = seq(0.4, 1.0, by = 0.1)) +
geom_point(data = df_mean22, size = 2,
             mapping = aes(x = block, y = average),
             color="black") +
geom_line(data = df_mean22, size=1,
            mapping = aes(x = block, y = average, group=1), color = 'gray10')+
theme(axis.text=element_text(size=12),
        axis.title=element_text(size=16))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

Tukey plot comparing every 2 blocks

block_m<-aov(block_perf~block, d_long)
summary(block_m)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## block          5  0.705 0.14095   14.35 1.28e-13 ***
## Residuals   1056 10.370 0.00982                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# library(lme4)
# library(lmerTest)
# block_m1<-lmer(block_perf~block+(1|UniqueID), d_long)
# summary(block_m1)
tuk<-TukeyHSD(block_m)

d_long$block<- as.character(d_long$block)


tuk_plot <- function (x, xlab, ylab, ylabels = NULL, ...) {
  for (i in seq_along(x)) {
    xi <- x[[i]][, -4L, drop = FALSE]
    yvals <- nrow(xi):1L
    dev.hold()
    on.exit(dev.flush())
    plot(c(xi[, "lwr"], xi[, "upr"]), rep.int(yvals, 2L), 
         type = "n", axes = FALSE, xlab = "", ylab = "", main = NULL, 
         ...)
    axis(1, ...)
    # change for custom axis labels
    if (is.null(ylabels)) ylabels <- dimnames(xi)[[1L]]

    axis(2, at = nrow(xi):1, labels = ylabels, 
         srt = 0, ...)
    abline(h = yvals, lty = 1, lwd = 1, col = "lightgray")
    abline(v = 0, lty = 2, lwd = 1, ...)
    segments(xi[, "lwr"], yvals, xi[, "upr"], yvals, ...)
    segments(as.vector(xi), rep.int(yvals - 0.1, 3L), as.vector(xi), 
             rep.int(yvals + 0.1, 3L), ...)
    title(main ="", 
          # change for custom axis titles
          xlab = xlab, ylab = ylab)

    box()
    dev.flush()
    on.exit()
  }
}


with(par(mai=c(0.7,0.8,0.5,0.5), cex.lab=1.2, cex.axis=1),{tuk_plot(tuk,  " ", "Block",c("2-1","3-1","4-1","5-1","6-1","3-2","4-2","5-2","6-2","4-3","5-3","6-3","5-4","6-4","6-5"),las=2)})