First, load libraries!
# Load libraries
library(lme4)
## Loading required package: Matrix
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(emmeans)
library(effectsize)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.0.10 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.1.8
## ✔ lubridate 1.9.2 ✔ tidyr 1.2.1
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(ggpubr)
Now, initialize data frames!
# Initialize data frames
allFiles <<- list.files(pattern=".csv$")
all_Real_Data <<- data.frame()
all_Clicked <<- data.frame()
all_Correct_Clicked <<- data.frame()
We need to remove these columns, because just in case the experimenter forgets to press “q” at the end of the experiment and instead presses “esc” to end the experiment (I did this a few times), the experiment ends normally BUT these columns are not logged into the csv, and then there’s a discrepancy where some csv’s have these columns and others don’t, which causes a problem. So we just remove them. And these are definitely pointless columns!
columns_to_remove <<- c("goRightMouse.clicked_name", "endExp.keys", "endExp.rt",
"endExp.started", "endExp.stopped", "expFeedbackText.started",
"expFeedbackText.stopped")
This is one of the big meats of the preprocessing stage. Pretty much each step is commented. TLDR: it extracts the relevant columns from the csv files and cleans up the names a bit for readability.
# Extract data files
for (d in 1:length(allFiles)){
all_Data <- read.csv(allFiles[d],header=TRUE,fileEncoding="UTF-8-BOM")
all_Data <- all_Data[, !(colnames(all_Data) %in% columns_to_remove)]
# Remove .csv extension for readability
all_Data$condsFile <- all_Data$condsFile %>% str_replace(".csv","")
# Filter out the practice trials
real_Data <- all_Data %>% filter(realOrNot == 1) %>%
# Filter out the introduction and instruction screens
filter(condsFile!="mixed") %>%
filter(trialFeedBackPress.keys!="q") %>%
# Select the relevant columns
select("participant","condsFile","delayDuration","order",
"multipleTrials.thisN","oneTrial.thisN","templateTime"
,"templateFrame","nCorrect","nIncorrect",
"clickedImages","clickedImagesTime",
"correctClickedsName","correctClickedsTime",
"correctImages","curCorrectImages",
"workingTime","workingFrame", "closing") %>%
# Rename for readability
rename(trial = multipleTrials.thisN, trip = oneTrial.thisN,
listTime = templateTime, listFrame = templateFrame ,
shelfTime = workingTime, shelfFrame = workingFrame,
remainCorAfterTrip = curCorrectImages,
numCorrect = nCorrect,
numIncorrect = nIncorrect)
# Increment trip and trial index by 1 because the starting point is 0
real_Data$trial <- real_Data$trial+1
real_Data$trip <- real_Data$trip+1
# Initialize columns containing the number of correct/incorrect for the
# current trip, see line 84
real_Data[,c("nCurCorrect","nCurIncorrect","nCurClickeds")] <- NA
# Create new data frames that will hold the number of correct clicks per
# participant
curParClicked <- data.frame()
curParCorrectClicked <- data.frame()
# Create dataframe called "current_Info" that holds the information about the
# current moment in the for-loop
for (n in 1:nrow(real_Data)){
current_Info <- real_Data[n, c("participant","condsFile","delayDuration","order",
"trial","trip","listTime","listFrame","numCorrect",
"numIncorrect","shelfTime",'shelfFrame',
"correctImages")]
# Clean the values in the column that specifies how many correct selections
# are left
# Note that when we use str_split(), the values turn into a list, so to keep
# it in vector form we use unlist()
# length() gets the count, so this variable will be a count of the number of
# correct selections that are left
curCorRemain <- real_Data$remainCorAfterTrip[n] %>% gsub("\\[|\\]","",.) %>%
gsub(" ","",.) %>% gsub("\'","",.) %>% str_split(",") %>% unlist() %>%
length()
# Clean the values from having weird characters
# setNames changes the name of the column
# First, we look at all of the attempts
curClickedName <- real_Data$clickedImages[n] %>% gsub("\\[|\\]","",.) %>%
gsub(" ","",.) %>% gsub("\'","",.) %>% str_split(",") %>%
setNames("clickName")
curClickedTime <-real_Data$clickedImagesTime[n] %>% gsub("\\[|\\]","",.) %>%
str_split(",") %>% unlist %>% as.numeric() %>% list() %>%
setNames("clickTime")
# Combine the info with the names of what was clicked and the time
curClicked <- cbind(current_Info,curClickedName,row.names=NULL) %>%
cbind(curClickedTime,row.names=NULL)
# Now, we are cleaning the values for the correct attempts
curCorClickedName <- real_Data$correctClickedsName[n] %>%
gsub("\\[|\\]","",.) %>% gsub(" ","",.) %>% gsub("\'","",.) %>%
str_split(",") %>% setNames("corClickName")
curCorClickedTime <-real_Data$correctClickedsTime[n] %>%
gsub("\\[|\\]","",.) %>% str_split(",") %>% unlist %>% as.numeric() %>%
list() %>% setNames("corClickTime")
# Combine the info with the names and the times of what was clicked
curCorClicked <- cbind(current_Info,curCorClickedName,row.names=NULL) %>%
cbind(curCorClickedTime, row.names=NULL)
# set nCurCorrect to be a count of what was correctly clicked, and
# nCurClickeds to be a count of what was attempted
real_Data[n,c("nCurCorrect","nCurClickeds")] <-
c(length(na.omit(curCorClickedTime$corClickTime)),
length(na.omit(curClickedTime$clickTime)))
# A new column, nRemainCorrect, gets the number of the remaining correct
# items added to the count of correct clicks (???)
real_Data[n,"nRemainCorrect"] <-
curCorRemain+length(na.omit(curCorClickedTime$corClickTime))
# Information about what a participant clicked goes in curParClicked, and
# info about what they clicked correctly goes in curParCorrectClicked
curParClicked <- rbind(curParClicked, curClicked)
curParCorrectClicked <- rbind(curParCorrectClicked, curCorClicked)
}
# Set incorrect clicks to be the number of clicks minus the number of correct
# clicks
real_Data$nCurIncorrect <- real_Data$nCurClickeds-real_Data$nCurCorrect
# Add the current iteration's data to the mother dataset, all_Real_Data
all_Real_Data <- rbind(all_Real_Data,real_Data)
all_Clicked <- rbind(all_Clicked,curParClicked)
all_Correct_Clicked <- rbind(all_Correct_Clicked,curParCorrectClicked)
}
Next are some managerial bits where we initialize the data frames that will store the info about performance.
# Make a dataframe containing the participant names and one containing the names
# of the conditions
participants <- unique(all_Clicked$participant)
cond <- unique(all_Clicked$condsFile)
# Initialize a data frame that will hold the information about each trip
newAllclicked <- data.frame()
# Add columns to the dataframe that contains all of the clicked data that will
# contain information about performance
all_Clicked[,c("clickAccuracy","remainCorImage","nRemainCor",
"remainOptions")] <- NA
Here is another big for-loop, where we get the items the participant clicked on in each trip, and which were correct.
# This for-loop will iterate over each participant and calculate their
# performance
for (p in 1:length(participants)){
# In the for-loop, we are going to look at one participant at a time
curParClicked <- all_Clicked %>% filter(participant==participants[p])
# And we are going to look at one condition at a time
cond <- unique(curParClicked$condsFile) # NEW
for (c in 1: length(cond)){
curCond <- curParClicked %>% filter(condsFile == cond[c])
# Set trials to be the unique values in the "trial" column for this condition
trials <- unique(curCond$trial)
# Now we are going to look at one trial at a time
for (t in 1:length(trials)){
curTrial <- curCond %>% filter(trial == trials[t])
# Clean the first value in the correctImages column for readability
curCorrectIm <- curTrial$correctImages[1] %>% gsub("\\[|\\]","",.) %>%
gsub(" ","",.) %>% gsub("\'","",.) %>% str_split(",") %>% unlist()
# Set trips to be the unique values in the "trip" column for this trial
trips <- unique(curTrial$trip)
# Now we are going to look at one trip at a time (within the trial, within
# the condition, within the participant)
for (tr in 1:length(trips)){
curTrip <- curTrial %>% filter(trip == trips[tr])
# The remaining options for any given trip, at the beginning, is always
# 10 (10 pairs of 2-AFC)
nRemainOption <- 10
# For each trip, make sure we know what images are currently correct
for (q in 1: nrow(curTrip)){
curTrip$remainCorImage[q] <- vapply(list(curCorrectIm),
paste,collapse=",",character(1L))
# nRemainCor is the count of how many correct options are left
curTrip$nRemainCor[q] <- length(curCorrectIm)
# remainOptions is how many clickable images are left
curTrip$remainOptions[q] <- nRemainOption
# clickAccuracy is whether the clicked image is correct or not
curTrip$clickAccuracy[q] <-
as.numeric(curTrip$clickName[q] %in% curCorrectIm)
# Here, the remaining options become 10 minus the number of clicks
nRemainOption <- nRemainOption -length(na.omit(curTrip$clickTime[q]))
# And the correct options that are left are the correct options not
# yet clicked on
curCorrectIm <- curCorrectIm[!curCorrectIm %in% curTrip$clickName[q]]
}
# newAllClicked will hold the information about each trip
newAllclicked <- rbind(newAllclicked,curTrip)
}
}
}
}
Now we are going to initialize another data frame, this one will store everything the participant clicked on before they click on something incorrect… so, its the streak correct!
allClickedBe4Incor <- data.frame()
This is the for-loop that will help us calculate streak correct. The comment towards the end about removing duplicates is because participants 1 and 2 are missing the no_cost condition, so if we don’t remove duplicates, those participants will have duplicate rows for their first blocks because the code wants everyone to end up with the same number of rows, if that makes sense.
for (p in 1:length(participants)){
# In the for-loop, we are going to look at one participant at a time, one
# condition at a time, one trial and then one trip at a time
curParClicked <- newAllclicked %>% filter(participant==participants[p])
cond <- unique(curParClicked$condsFile) # NEW
for (c in 1: length(cond)){
curCond <- curParClicked %>% filter(condsFile == cond[c])
trials <- unique(curCond$trial)
for (t in 1:length(trials)){
curTrial <- curCond %>% filter(trial == trials[t])
trips <- unique(curTrial$trip)
# Now we are we will go within the trip
for (tr in 1:length(trips)){
curTrip <- curTrial %>% filter(trip == trips[tr])
# Here, correctIndex is assigned the first instance where the accuracy
# is zero, or the first instance where the participant was incorrect
correctIndex <- (which(curTrip$clickAccuracy==0))[1]
# if correctIndex is NA, it means that the participant did not click on
# anything, so their streak is not broken, so collect the data from this
# trip
if (is.na(correctIndex)){
validTripClick <- curTrip
# if correctIndex is 1, we want this row to be in ValidTripClick
} else if(correctIndex == 1) {
validTripClick <- curTrip[1,]
# if correctIndex is indeed 0, make sure validTripClick contains the
# first row of the current trip up until (but not including) the first
# incorrect response
} else{
validTripClick <- curTrip[1:correctIndex-1,]
}
# Combine it into a dataframe!
allClickedBe4Incor <- rbind(allClickedBe4Incor,validTripClick)
}
}
}
# Remove exact duplicate rows from allClickedBe4Incor #NEW because the
# condsFile mismatch, it was making duplicates for Ss 1 and 2
allClickedBe4Incor <- allClickedBe4Incor %>% distinct()
}
This part just sums up the streak corrects for us.
# Get the sum of the streak correct for each trip
allClickedBe4IncorSum <- allClickedBe4Incor %>%
group_by(participant,condsFile,trial,trip) %>%
summarize(nCurCorBe4Inc = sum(clickAccuracy,na.rm = T)) %>%
ungroup()
## `summarise()` has grouped output by 'participant', 'condsFile', 'trial'. You
## can override using the `.groups` argument.
This part combines streak correct data with the OG data.
# Combine with the mother dataset
all_Real_Data <- merge(all_Real_Data,allClickedBe4IncorSum) %>%
relocate(nCurCorBe4Inc,.after=nCurCorrect)
Yay!
# Calculate Memory Usage
# MU = (((numCorrect/numAttempted)-0.5)/0.5)*numAttempted
all_Real_Data <- all_Real_Data %>%
group_by(participant, condsFile, trial, trip) %>%
mutate(MU = (((nCurCorrect / nCurClickeds) - 0.5) / 0.5) * nCurClickeds)
When participants don’t click anything, the above calculation sets MU to NaN, so we make sure its actually 0 (because they used zero memory…)
# Replace NaNs with zeroes, because NaNs in MU occurred when the participant did
# not make any clicks (so they used zero memory)
all_Real_Data$MU[is.nan(all_Real_Data$MU)] <- 0
Now we make sure that every participant has a first trip. We want the below code to return an empty dataset, and it should.
# Make sure there is a first trip for every trial
unique_combinations_no_trip_1 <- all_Real_Data %>%
group_by(participant, condsFile, trial) %>%
summarise(has_trip_1 = any(trip == 1), .groups = 'drop') %>%
filter(!has_trip_1) %>%
select(participant, condsFile, trial)
# Looks good! This code should output an empty dataset
Summarized_data <- all_Real_Data %>%
group_by(participant,condsFile,order,trial) %>%
summarise(nTrip=n(),
totalListTime=sum(listTime,na.rm = T),
meanListTime = mean(listTime,na.rm = T),
totalShelfTime = sum(shelfTime,na.rm = T),
meanShelfTime = mean(shelfTime,na.rm = T),
totalTime = totalListTime + totalShelfTime,
totalCorrect=sum(nCurCorrect),
meanCorrect = totalCorrect/nTrip,
meanCorBefInc=sum(nCurCorBe4Inc)/nTrip,
totalInCorrect = sum(nCurIncorrect),
meanIncorrect = totalInCorrect/nTrip,
maxCorrect = max(nCurCorrect),
maxIncorrect = max(nCurIncorrect),
totalClickeds=sum(nCurClickeds),
meanclickeds=totalClickeds/nTrip,
meanMU = mean(MU, na.rm = TRUE)
)
## `summarise()` has grouped output by 'participant', 'condsFile', 'order'. You
## can override using the `.groups` argument.
And again for first trips only…
# Do the same but for first trips only
first_trip_only <- all_Real_Data %>%
filter(trip == 1)
Summarized_first_trips <- first_trip_only %>%
group_by(participant,condsFile,order,trial) %>%
summarise(nTrip=n(),
totalListTime=sum(listTime,na.rm = T),
meanListTime = mean(listTime,na.rm = T),
totalShelfTime = sum(shelfTime,na.rm = T),
meanShelfTime = mean(shelfTime,na.rm = T),
totalTime = totalListTime + totalShelfTime,
totalCorrect=sum(nCurCorrect),
meanCorrect = totalCorrect/nTrip,
meanCorBefInc=sum(nCurCorBe4Inc)/nTrip,
totalInCorrect = sum(nCurIncorrect),
meanIncorrect = totalInCorrect/nTrip,
maxCorrect = max(nCurCorrect),
maxIncorrect = max(nCurIncorrect),
totalClickeds=sum(nCurClickeds),
meanclickeds=totalClickeds/nTrip,
meanMU = mean(MU, na.rm = TRUE)
)
## `summarise()` has grouped output by 'participant', 'condsFile', 'order'. You
## can override using the `.groups` argument.
Save as csv!
#write.csv(Summarized_data,"Summarized_data_EM6_FALLpilot.csv",row.names = FALSE)
#write.csv(Summarized_first_trips,"Summarized_first_trips_EM6_FALLpilot.csv",row.names = FALSE)