library(tidyverse)
library(zoo)
library(knitr)
library(tibble)
library(plyr)
library(dplyr)
library(data.table)
library(Hmisc)
library(grt)
library(reshape) #reshape data from long to wide
library(MASS) #backward elimination of non-significant predictors in linear models
library(nnet) #multinomial logistic regression
#need this to work on Mac
logLik.grg <- function(object, ...)
{
val <- object$logLik
class(val) <- "logLik"
val
}
df <- read.csv("qualtrics_pilot.csv", header=T)
df<-df %>% drop_na("exist_in_Pav") #drop people who did not complete Pavlovia part
df<-df %>% mutate_all(~ifelse(is.na(.x), mean(.x, na.rm = TRUE), .x)) #fill na with col average
# which(colnames(df)=="NA.1_1") #get column number to extract
# which(colnames(df)=="EX.77R_1")
cols <- c(1, 5:12, 18:79)
df<-df[,cols] #extract only ATQ columns
cols.num <- c(10:71) #find ATQ columns
df[cols.num] <- sapply(df[cols.num],as.numeric) #convert to numerics
#sapply(df, class)
names(df)[2:9] <- c("Ethnicity", "Age", "Gender", "Education_Completed","Country_origin","Country_residence", "Disability_status","Disability_spec")
indx <- grepl('R', colnames(df)) #find reverse coded items
df[,indx]<-abs(df[,indx]-8) #reverse coding
NA_indx <- grepl('NA', colnames(df)) #find Neg_Affect items
NA_sum <- rowSums(df[, NA_indx]) #total score of NA items for each participant
df$NA_score <- NA_sum/25 #append average NA scores to df
EC_indx <- grepl('EC', colnames(df)) #find EC items
EC_sum <- rowSums(df[, EC_indx]) #total score of EC items for each participant
df$EC_score <- EC_sum/19 #append EC scores to df
EX_indx <- grepl('EX', colnames(df)) #find EX items
EX_sum <- rowSums(df[, EX_indx]) #total score of EX items for each participant
df$EX_score <- EX_sum/17 #append EX scores to df
#subj 3302ma completed 11 blocks out of 12, will be removed
df <- df %>% filter(!UniqueID == "3302ma")
df_temp <-df[,c(1:9, 72:74)]
setwd( "/Users/tzhu9/Desktop/pilotData_analysis/pilot_Pavlovia_data/completed_pilot_data")
csv <- list.files(full.names = TRUE, pattern = '*.csv')
df_Pav <- as_tibble(rbind.fill(lapply (csv, fread)))
#subset dataset
# which(colnames(df_Pav)=="exp_Sentence")
# which(colnames(df_Pav)=="catlearn_corrAns")
cols1 <- c(1, 85:132)
df_Pav<-df_Pav[,cols1]
# which(colnames(df_Pav)=="exp_typedWord")
# which(colnames(df_Pav)=="numberWords_recalled")
# which(colnames(df_Pav)=="key_resp_20.keys")
# which(colnames(df_Pav)=="stim_file")
cols2 <- c(1:6,12,20:21,31:33,40:47, 49)
df_Pav<-df_Pav[,cols2]
#fill empty cells with NA
df_Pav <- df_Pav %>%
mutate_at(vars(colnames(.)),
.funs = funs(ifelse(.=="", NA, as.character(.))))
#remove empty rows
df_Pav<-df_Pav[!is.na(df_Pav$exp_Sentence) | !is.na(df_Pav$exp_typedWord) | !is.na(df_Pav$key_resp_20.keys),]
#which(colnames(df_Pav)=="sentence_num_total")
df_rspan<-df_Pav[, 1:9] #subset relevant cols
df_rspan<-df_rspan[!is.na(df_rspan$exp_Sentence) | !is.na(df_rspan$exp_typedWord),] #keep rows with either judgment or recall responses
cols4 <- c(4, 6, 8:9)
df_rspan[cols4] <- sapply(df_rspan[cols4],as.numeric)
#keep only judgment related rows
accuracy <- df_rspan %>%
filter(!is.na(exp_isCorrect))
#late responses on judgment are filled with rt of 4.5
accuracy$key_resp_10.rt[is.na(accuracy$key_resp_10.rt)] <- 4.5
#keep only recalled_word related rows
recall_num <- df_rspan %>%
filter(!is.na(numberWords_recalled))
#accuracy dataframe
df1<-aggregate(accuracy[, c(4,6)], list(accuracy$UniqueID), mean)
#recalled number dataframe
df2<-aggregate(recall_num[, 8], list(recall_num$UniqueID), sum)
df_rspan_perf <- merge(df1,df2,by="Group.1")
names(df_rspan_perf)[1] <- "UniqueID"
head(df_rspan_perf)
df_tem_rspan<-merge(df_temp, df_rspan_perf, by = "UniqueID")
kable(head(df_tem_rspan))
| UniqueID | Ethnicity | Age | Gender | Education_Completed | Country_origin | Country_residence | Disability_status | Disability_spec | NA_score | EC_score | EX_score | exp_isCorrect | key_resp_10.rt | numberWords_recalled |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0202ch | White or Caucasian | 18 | Male | High school | Canada | Canada | No | 3.56 | 5.473684 | 4.823529 | 0.9833333 | 1.519284 | 48 | |
| 0602ly | Asian or Pacific Islander | 18 | Male | High school | Pakistan | Canada | No | 3.64 | 3.736842 | 6.117647 | 0.9333333 | 2.737075 | 42 | |
| 0902be | Asian or Pacific Islander | 18 | Male | High school | China | Canada | No | 4.28 | 4.157895 | 4.235294 | 0.9000000 | 1.172533 | 35 | |
| 1001re | Not listed here | 19 | Female | High school | Canada | Canada | No | 4.68 | 3.105263 | 4.529412 | 0.9000000 | 1.774508 | 44 | |
| 1502ha | South Asian | 18 | Male | High school | Sri Lanka | Canada | No | 4.00 | 5.210526 | 3.823529 | 0.9500000 | 2.029060 | 55 | |
| 2202li | White or Caucasian | 18 | Female | High school | Canada | Canada | No | 4.56 | 4.526316 | 4.588235 | 0.9500000 | 1.197451 | 41 |
cols3<- c(1, 10:21)
df_catlearn<-df_Pav[, cols3] #remove irrelevant cols
df_catlearn<-df_catlearn[!is.na(df_catlearn$key_resp_20.keys),] #remove empty rows
df_catlearn <- df_catlearn %>% filter(!UniqueID == "3302ma")#incomplete catlearn data
df_catlearn$block <- rep(1:12, times=25, each = 30)
df_catlearn<- df_catlearn[, c(1, 14, 2:13)]
#describe(df_catlearn)
df_catlearn <-(df_catlearn %>% mutate(catlearn_corrAns = case_when(
catlearn_corrAns == "a" ~ 1,
catlearn_corrAns == "b" ~ 2))
)
#relabel the key_resp_3.keys as 1 and 2
df_catlearn<-(df_catlearn %>% mutate(key_resp_20.keys = case_when(
key_resp_20.keys == "a" ~ 1,
key_resp_20.keys == "b" ~ 2))
)
cols5<-c(4, 5, 7:13)
df_catlearn[cols5] <- sapply(df_catlearn[cols5],as.numeric)
dat<-df_catlearn
# #template code for combining blocks
# # cols6<-c(1, 3:14)
# # dat<-df_catlearn[, cols6]
# # dat$block <-rep(1:6, times = 24, each=60)
# # source("plot.gcjc.R")
#
blocks <- levels(factor(dat$block))
sbj <- levels(factor(dat$UniqueID))
models <- c("Orientation", "Length", "Information Integration", "Conjunctive Rule", "Fixed Random", "General Random")
res <- matrix(nrow=length(sbj) * length(blocks),ncol=(length(models) * 2 + 2))
res <- cbind(expand.grid(sbj,blocks), res)
colnames(res)[1:2] <- c("UniqueID","block")
pdf(file="25ss_individual_plots.pdf", width=12, height=9)
op <- par(mfrow = c(3, 4), pty = "m")
for(i in sbj)
{
for(j in blocks)
{
data <- subset(dat, dat$block == j & dat$UniqueID == i)
fit.ori <- glc(key_resp_20.keys ~ ori_transform, category=data$catlearn_corrAns, data=data)
fit.length <- glc(key_resp_20.keys ~ length, category=data$catlearn_corrAns, data=data)
fit.2d <- glc(key_resp_20.keys ~ length + ori_transform, category=data$catlearn_corrAns, data=data)
fit.cj <- gcjc(key_resp_20.keys ~ length + ori_transform,
category=data$catlearn_corrAns, data=data,
equal.noise = TRUE, config = 2,
fixed = list(noise = c(TRUE, FALSE), bias = c(FALSE, FALSE)))
fit.frg <- grg(data$catlearn_corrAns, fixed=T)
fit.grg <- grg(data$catlearn_corrAns, fixed=F)
AICs <- AIC(fit.ori, fit.length, fit.2d, fit.cj, fit.frg, fit.grg)
BICs <- AIC(fit.ori, fit.length, fit.2d, fit.cj, fit.frg, fit.grg, k=log(80))
winner <- which.min(AICs$AIC)
winner2 <- which.min(BICs$AIC)
res[res$UniqueID==i & res$block == j,3:ncol(res)] <- c(round(AICs$AIC,2), models[winner], round(BICs$AIC,2), models[winner2])
##### now plot'em ###RACHEL CHANGED 'winner' to 'winner2' to reflect BIC instead of AIC
col <- rep("black",4)
col[winner2] <- "red"
bg <- c("blue","red")[factor(data$key_resp_20.keys)]
pch <- c(21,24)[factor(data$catlearn_corrAns)]
title <- paste(i, ": block", j, ": ", models[winner2])
plot(ori_transform ~ length, data=data, xlab="Length", ylab="Orientation", bg=bg, pch=pch, xlim=c(0,300), ylim=c(0, 110), main=title)
abline(h=coef(fit.ori), col = col[1])
abline(v=coef(fit.length), col = col[2])
abline(coef(fit.2d), col = col[3])
cj.coef <- coef(fit.cj)
abline(h=cj.coef[["ori_transform"]], col = col[4], lty = 3)
abline(v=cj.coef[["length"]], col = col[4], lty = 3)
}
}
## Warning in glc(key_resp_20.keys ~ length + ori_transform, category = data$catlearn_corrAns, : nlminb problem, convergence error code = 1
## message = iteration limit reached without convergence (10)
dev.off()
## quartz_off_screen
## 2
res.nam <- c("ori","length","2d","cj","frg","grg","winner")
colnames(res)[3:16] <- c(paste(res.nam, "AIC", sep="."), paste(res.nam, "BIC", sep="."))
catlearn_perf<- dat %>%
group_by(UniqueID, block) %>%
dplyr::summarize(Mean = mean(key_resp_20.corr, na.rm=TRUE))
head(catlearn_perf)
wide1<-cast(catlearn_perf, UniqueID~block) #catlearn_perf long to wide for later merging
# count the number of time each model is used by each participant
model_block <- res %>%
group_by(UniqueID, winner.BIC) %>%
dplyr::summarise(Count = n())
#how many blocks for each Ss was best fitted by CR model
head(model_block[with(model_block, winner.BIC == "Conjunctive Rule" ), ])
# find the first block each S used the optimal CR strategy
first_cj<- res %>%
group_by(UniqueID, block, winner.BIC) %>%
dplyr::summarise(Count = n())
a<-first_cj[first_cj$winner.BIC == "Conjunctive Rule",]
first<-a %>%
group_by(UniqueID) %>%
slice(which.min(block))
head(first)
#merge df_tem_rspan that has temperament scores and rspan performance with catlearn model fit descriptives
b<-merge(first, df_tem_rspan, all = TRUE)
cols<-c(1:2, 5:18)
b<-b[, cols]
colnames(b)[2] <- "first_CR_block"
#reshape model_block from long to wide, this df contains number of blocks best fitted by each strategy
wide<-cast(model_block, UniqueID ~ winner.BIC)
b<-merge(wide,b,all =TRUE)
#fill NAs with 0 in the block count for each model
b[is.na(b)] <- 0
complete<-setnames(b, old = c('Conjunctive Rule','Fixed Random','Information Integration', 'Length','Orientation'), new = c('total_CR','total_Rand','total_II','total_length','total_ori'))
# generate a column that keep count of the total number of blocks best fitted by rule-based models, regardless of 1d or 2d
complete$total_rule<-complete$total_CR+complete$total_length + complete$total_ori
# merge with catlearn accuracy performance of each block
complete<-merge(complete, wide1, by = "UniqueID")
# remove Ss based on performance - more than 3 blocks < 60% accuracy
dt<-complete[,23:34]
complete$num_low_perf<-rowSums(dt < 0.60)
complete$avg_cat_perf <- rowSums(dt)/12
complete<-complete[!complete$num_low_perf > 6,]
#generate columns that take temperament factors as low/medium/high levels for potential analyses
complete<-complete %>% mutate(NA_level = cut(NA_score, quantile(NA_score, c(0, 0.5, 1)), labels = c('Low', 'High'), include.lowest = TRUE))
complete<-complete %>% mutate(EC_level = cut(EC_score, quantile(EC_score, c(0, 0.5, 1)), labels = c('Low', 'High'), include.lowest = TRUE))
complete<-complete %>% mutate(EX_level = cut(EX_score, quantile(EX_score, c(0, 0.5, 1)), labels = c('Low', 'High'), include.lowest = TRUE))
cols2<-c(1,8:18, 37:39, 19:21, 23:34, 2:7, 22)
complete<-complete[,cols2]
complete<-setnames(complete, old = c('1','2','3','4', '5','6','7','8','9','10',"11","12"), new = c('block_1','block_2','block_3','block_4', 'block_5','block_6','block_7','block_8','block_9','block_10','block_11','block_12'))
head(complete)
#find the first block of non-random response as use it as default strategy
winnerBIC<-res[,c(1, 2, 16)]
default_strat<-cast(winnerBIC, UniqueID ~ block)
default_strat<-setnames(default_strat, old = c('1','2','3','4', '5','6','7','8','9','10',"11","12"), new = c('block_1','block_2','block_3','block_4', 'block_5','block_6','block_7','block_8','block_9','block_10','block_11','block_12'))
# find the first non-random block and tracks its value and block number
func <- function(..., val = "Fixed Random") {
dat <- unlist(list(...))
ind <- which.max(dat != val)
list(dat[[ind]], ind)
}
new<-setNames(
do.call(rbind.data.frame, do.call(Map, c(list(f=func), default_strat[,-1]))),
c("default_strat", "first_nonRand_block"))
default_strat <- cbind(new$default_strat, new$first_nonRand_block, default_strat)
default_strat<-setnames(default_strat, old = c("new$default_strat", "new$first_nonRand_block"), new = c("default_strat", "first_nonRand_block"))
default_strat$default_strat <- ifelse(default_strat$default_strat=="Length", "1",
ifelse(default_strat$default_strat=="Orientation", "1",
ifelse(default_strat$default_strat=="Conjunctive Rule", "1",
ifelse(default_strat$default_strat=="Information Integration", "0",
NA ))))
default_strat$default_strat<-as.numeric(default_strat$default_strat)
default_strat<-merge(default_strat[,1:3], complete, by="UniqueID")
default_strat<-default_strat[, c(1, 4:39, 2:3)]
head(default_strat)
Only 3 people used II as default strategy out of the 23 Ss, this analysis can only be done when more data is available.
#model did not converge with very few IIs in the column
# fit_first_nonrand <-glm(default_strat ~ EX_score + NA_score + EC_score+ key_resp_10.rt + numberWords_recalled + exp_isCorrect, default_strat, family="binomial")
# look at at which block Ss started using the optimal strategy. In other words, their learning rate.
pot_df<-default_strat[!is.na(default_strat[, 36]), ] #removes Ss who never used CR
pot_df[,c(10:12,36)] <- sapply(pot_df[,c(10:12,36)], as.numeric)
pot_fit <- lm(first_CR_block ~ EC_score + EX_score + NA_score + key_resp_10.rt + exp_isCorrect + numberWords_recalled, data = pot_df)
step_pot<-step(pot_fit)
summary(step_pot)
##
## Call:
## lm(formula = first_CR_block ~ EC_score + numberWords_recalled,
## data = pot_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4347 -1.4709 -0.0765 1.4958 3.5948
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.60359 3.38440 1.360 0.19261
## EC_score -1.89674 0.68024 -2.788 0.01315 *
## numberWords_recalled 0.22093 0.06841 3.229 0.00524 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.054 on 16 degrees of freedom
## Multiple R-squared: 0.4629, Adjusted R-squared: 0.3958
## F-statistic: 6.896 on 2 and 16 DF, p-value: 0.006921
Effortful control significantly predicted learning rate, higher EC is associated with faster learning of the optimal strategy. WMC also significantly predicted learning rate, interestingly, results seem to suggest that higher number of words recalled is associated with slower learning rate.
#create a column that tracks whether participants ever used optimal strategy in any of the 12 blocks
default_strat$first_CR_block<-as.numeric(default_strat$first_CR_block)
default_strat$ifOpti[is.na(default_strat$first_CR_block)] <- 0
default_strat$ifOpti[default_strat$first_CR_block > 0] <-1
default_strat$ifOpti <- as.factor(default_strat$ifOpti)
m <-glm(ifOpti ~ EX_score + NA_score + EC_score+ key_resp_10.rt + numberWords_recalled + exp_isCorrect, default_strat, family="binomial")
step_m <- stepAIC(m, direction="both")
summary(step_m)
##
## Call:
## glm(formula = ifOpti ~ NA_score, family = "binomial", data = default_strat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7229 0.2454 0.3756 0.6276 1.1619
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 12.041 7.672 1.570 0.117
## NA_score -2.480 1.764 -1.406 0.160
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 21.254 on 22 degrees of freedom
## Residual deviance: 18.621 on 21 degrees of freedom
## AIC: 22.621
##
## Number of Fisher Scoring iterations: 5
No predictor significantly predicted the ability to discover optimal strategy or not, although the model chose to keep NA_score. Current data suggests the trend is that increase in Negative Affect is associated with decreases in the log odds of discovering the optimal strategy.
m3<-lm(total_CR ~ EX_score + NA_score + EC_score+ key_resp_10.rt + numberWords_recalled + exp_isCorrect, default_strat)
step_m3<-step(m3)
summary(step_m3)
##
## Call:
## lm(formula = total_CR ~ NA_score + EC_score + key_resp_10.rt +
## numberWords_recalled, data = default_strat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6302 -1.1980 0.4832 0.9290 1.9637
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.66779 4.76838 0.350 0.73058
## NA_score -1.05711 0.70462 -1.500 0.15088
## EC_score 1.81141 0.51931 3.488 0.00262 **
## key_resp_10.rt 1.09765 0.60786 1.806 0.08771 .
## numberWords_recalled -0.09771 0.04340 -2.252 0.03708 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.401 on 18 degrees of freedom
## Multiple R-squared: 0.5308, Adjusted R-squared: 0.4265
## F-statistic: 5.091 on 4 and 18 DF, p-value: 0.006367
Effortful control significantly predicts the consistency of people applying optimal strategy throughout the task; higher EC, more use of optimal strategy. NA was not removed from the model, and itβs suggesting a negative relationship with the use of optimal CR strategy. Higher NA, less CR use. WMC is also significant, higher WMC, less CR strategy use.
# find first block that used optimal strategy then calculate accuracy for that block through the end of task
cols <- grep('block_', names(pot_df), value = TRUE) #use the df with total_CR_block == NA rows removed
n <- length(cols)
pot_df$per_afterOpti <- mapply(function(x, y) mean(unlist(pot_df[x, cols[y:n]])), seq(nrow(pot_df)), pot_df$first_CR_block)
m4 <- lm(per_afterOpti ~ EC_score+ key_resp_10.rt + numberWords_recalled + exp_isCorrect, pot_df)
step_m4 <- step(m4)
summary(step_m4)
##
## Call:
## lm(formula = per_afterOpti ~ EC_score + key_resp_10.rt + numberWords_recalled,
## data = pot_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.148345 -0.051042 0.006609 0.042259 0.121577
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.269830 0.173607 1.554 0.1410
## EC_score 0.057372 0.028243 2.031 0.0603 .
## key_resp_10.rt 0.058327 0.034602 1.686 0.1125
## numberWords_recalled 0.003619 0.002427 1.491 0.1567
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07283 on 15 degrees of freedom
## Multiple R-squared: 0.3641, Adjusted R-squared: 0.2369
## F-statistic: 2.863 on 3 and 15 DF, p-value: 0.07181
Effortful control is approaching significance, higher EC is associated with higher performance after discovering the optimal strategy, which means more consistently applying the optimal strategy.