The scripts contained in this markdown should in principle satisfy the following criteria:
PATH <- here("anonymized_All.csv")
raw_d <- read.csv(PATH)
Todo: automatic score conversion?
fd_response <- raw_d %>% filter(
trial_type == "free-description-response") %>%
filter((str_length(as.character(responses))) > 19) %>% # get rid of shorter response
select(responses, rt)
fd_stimulus <- raw_d %>% filter(
trial_type == "image-keyboard-response" & str_detect(stimulus, "free_description")
) %>% select(subject, stimulus)
fd_stimulus$trial_num <- sequence(rle(fd_stimulus$subject)$lengths) #add trial num for each participant
fd_table <- bind_cols(fd_stimulus, fd_response) %>%
mutate(task_name = "FD",
trial_RT = rt) %>%
rowwise() %>%
mutate(trial_raw =
unlist(fromJSON(as.character(responses)))) %>%
select(subject, task_name, stimulus, trial_num,trial_raw, rt)
fd_table %>% head() %>% kable()
subject | task_name | stimulus | trial_num | trial_raw | rt |
---|---|---|---|---|---|
1 | FD | images/free_description/01-bird.jpg | 1 | A bird standing on the rock | 6205.185 |
1 | FD | images/free_description/05-cow-black.jpg | 2 | A weird looking animal in front of the trees | 7610.06 |
1 | FD | images/free_description/04-car.jpg | 3 | I forgot ……………..lol | 6364.65 |
1 | FD | images/free_description/08-fish.jpg | 4 | A fish in front of something yellow | 15905.93 |
1 | FD | images/free_description/06-bike.jpg | 5 | hahahahahahahahahha | 4154.55 |
1 | FD | images/free_description/02-boat-castle.jpg | 6 | boat????lalalalala | 6584.525 |
Todo: score calculating
CA_table <- raw_d %>%
filter(trial_type == "attribution-Q1") %>%
rowwise() %>%
mutate(
task_name = "CA",
trial_RT = rt,
trial_raw = unlist(fromJSON(as.character(responses)))
) %>%
select(subject, task_name, trial_RT, trial_raw)
CA_table$trial_num <- sequence(rle(CA_table$subject)$lengths)
CA_table %>% head() %>% kable()
subject | task_name | trial_RT | trial_raw | trial_num |
---|---|---|---|---|
1 | CA | 13359.37 | Because she’s scared | 1 |
1 | CA | 8947.505 | Because it’s dangerous | 2 |
2 | CA | 19951.755 | because the bicycle is dangerous to play on | 1 |
2 | CA | 12144.275 | because the bicycle is dangerous to play on | 2 |
3 | CA | 50499.88 | both are | |
possible! | 1 | |||
3 | CA | 16859.85 | same as before | 2 |
Todo: calculate the key measurement messy data structure – individual and collected together
EB_table <- raw_d %>%
filter(trial_type == "ebbinghaus-keyboard") %>% separate(stimulus, into = c("path_a","path_b","stim_name"),sep = "/") %>%
separate(stim_name, into = c("stim_id", "trial_stim_left", "trial_stim_right"), sep = "_") %>% select(subject, key_press, rt, answer_correct, stim_id, trial_stim_left, trial_stim_right) %>%
mutate(
task_name = "EB",
block_type = case_when(
stim_id == "N" ~ "NC",
TRUE ~ "IL"
),
trial_stim_right = gsub(".jpg","", trial_stim_right),
trial_key_pressed = case_when(
key_press == 77 ~ "m",
TRUE ~ "z"
),
trial_correct = answer_correct,
temp_id = paste(subject, block_type),
trial_RT = rt
)
EB_table$trial_num <- sequence(rle(EB_table$temp_id)$lengths)
EB_table_summarize <- EB_table %>%
group_by(subject,block_type) %>%
summarise(
block_RT = mean(as.numeric(trial_RT)),
block_correct = mean(as.numeric(trial_correct))
) %>%
mutate(temp_id = paste(subject, block_type))
## `summarise()` regrouping output by 'subject' (override with `.groups` argument)
EB_table <- left_join(EB_table, EB_table_summarize, by = "temp_id") %>%
mutate(subject = subject.x,
block_type = block_type.x) %>%
select(subject, task_name, block_type, block_RT, block_correct, trial_num, trial_stim_left, trial_stim_right, trial_key_pressed, trial_RT, trial_correct)
EB_table %>% head() %>% kable(full_width = F)
subject | task_name | block_type | block_RT | block_correct | trial_num | trial_stim_left | trial_stim_right | trial_key_pressed | trial_RT | trial_correct |
---|---|---|---|---|---|---|---|---|---|---|
1 | EB | NC | 179.9 | 0.5 | 1 | 106 | 100 | m | 1050.05 | 0 |
1 | EB | NC | 179.9 | 0.5 | 2 | 100 | 110 | z | 388.265 | 0 |
1 | EB | NC | 179.9 | 0.5 | 3 | 98 | 100 | m | 759.36 | 1 |
1 | EB | NC | 179.9 | 0.5 | 4 | 100 | 86 | z | 390.715 | 1 |
1 | EB | NC | 179.9 | 0.5 | 5 | 100 | 118 | m | 557.58 | 1 |
1 | EB | NC | 179.9 | 0.5 | 6 | 100 | 102 | z | 298.895 | 0 |
Todo: not sure about the conversion system right now
RMTS_cond_table <- raw_d %>% filter(trial_type == "RMTStrain") %>%
select(subject, left_object, right_object, activate) %>%
mutate(
left_object = as.character(left_object),
right_object = as.character(right_object),
condition = case_when(
(left_object == right_object) && (activate == TRUE) ~ "same_actv",
(left_object != right_object) && (activate == TRUE) ~ "diff_actv",
(left_object == right_object) && (activate == FALSE) ~ "diff_actv",
(left_object != right_object) && (activate == FALSE) ~ "same_actv")
) %>%
group_by(subject) %>%
distinct(condition, .keep_all = TRUE) %>%
select(subject, condition)
RMTS_table <- raw_d %>% filter(trial_type == "RMTStest") %>%
select(subject, choice, sameSide, samePair, diffPair) %>%
left_join(RMTS_cond_table, by = "subject") %>%
mutate(
task_name = "RMTS",
choice = as.character(choice),
sameSide = as.character(sameSide),
trial_choice = case_when(
condition == "diff_actv" && choice == "rightTestButton" && sameSide == "R" ~ "obj",
condition == "diff_actv" && choice == "rightTestButton" && sameSide == "L" ~ "rel",
condition == "diff_actv" && choice == "leftTestButton" && sameSide == "R" ~ "rel",
condition == "diff_actv" && choice == "leftTestButton" && sameSide == "L" ~ "obj",
condition == "same_actv" && choice == "rightTestButton" && sameSide == "R" ~ "rel",
condition == "same_actv" && choice == "rightTestButton" && sameSide == "L" ~ "obj",
condition == "same_actv" && choice == "leftTestButton" && sameSide == "R" ~ "obj",
condition == "same_actv" && choice == "leftTestButton" && sameSide == "L" ~ "rel"
)
) %>%
select(subject, task_name, trial_choice)
RMTS_table$trial_num <- sequence(rle(RMTS_table$subject)$lengths)
RMTS_table %>% head() %>% kable()
subject | task_name | trial_choice | trial_num |
---|---|---|---|
1 | RMTS | obj | 1 |
1 | RMTS | obj | 2 |
1 | RMTS | obj | 3 |
1 | RMTS | obj | 4 |
2 | RMTS | obj | 1 |
2 | RMTS | obj | 2 |
height or proportion of height?
# build a dictionary with file name as key and size as value
sticker_dir = "preprocessing/material_stickers"
stickers = list.files(sticker_dir)
d <- dict()
for (s in stickers){
img <- readPNG(file.path(sticker_dir, s))
dim <- dim(img)
size <- dim[[1]] * dim[[2]]
src_name <- paste("images/sort/",s,sep = "")
d[src_name] <- size[[1]]
}
# get all the trials with sticker
sticker_d <- raw_d %>%filter(trial_type == "horizon-sticker")
#focus on the moved sticker for each participant
moved_sticker <- sticker_d %>%
select(subject, rt, trial_type, moves,final_locations) %>%
toJSON() %>%
fromJSON() %>%
mutate(
moves = as.character(moves),
moves = map(moves, ~ fromJSON(.) %>% as.data.frame())) %>%
unnest(moves) %>%
group_by(subject) %>%
distinct(src, .keep_all = TRUE)
# helper function to get each sticker's dimension from dictionary
get_sticker_dim <- function(src, d){
return (d[src][[1]])
}
# convert from src name to sticker dimension
moved_sticker$size <- unlist(map(moved_sticker$src,get_sticker_dim, d))
# caculate total sticker areas for each subject
sticker_area <- moved_sticker %>%
group_by(subject) %>%
summarize(
sum_area = sum(size)
)
## `summarise()` ungrouping output (override with `.groups` argument)
# function that map back to the dataframe
get_sticker_total_area <- function(id, caculated_df){
area <- caculated_df %>%
filter(subject == id) %>%
select(sum_area) %>%
pull()
return (area)
}
# add a column back to the raw dataframe
moved_sticker$stkr_area <- unlist(map(moved_sticker$subject,get_sticker_total_area, sticker_area))
# calculate final height
CANVAS_HEIGHT <-334.4
#focus on the moved sticker for each participant
final_sticker <- sticker_d %>%
select(subject, rt, trial_type, moves,final_locations) %>%
toJSON() %>%
fromJSON() %>%
mutate(
final_locations = as.character(final_locations),
final_locations = map(final_locations, ~ fromJSON(.) %>% as.data.frame())) %>%
unnest(final_locations) %>%
filter(src == "images/sort/horizon_2.png") %>%
mutate(
hz_height = CANVAS_HEIGHT - y
) %>%
select(subject, hz_height)
count_stkr <- moved_sticker %>%
group_by(subject) %>%
summarise(
stkr_count = n()
)
## `summarise()` ungrouping output (override with `.groups` argument)
stkr_df <- left_join(moved_sticker, final_sticker, by = "subject")
HZ_table <- left_join(stkr_df, count_stkr, by = "subject") %>%
mutate(task_name = "HZ") %>%
select(subject, task_name, rt, stkr_count, stkr_area, hz_height)
HZ_table %>% head() %>% kable()
subject | task_name | rt | stkr_count | stkr_area | hz_height |
---|---|---|---|---|---|
1 | HZ | 28978.42 | 4 | 321852 | 334.4 |
1 | HZ | 28978.42 | 4 | 321852 | 334.4 |
1 | HZ | 28978.42 | 4 | 321852 | 334.4 |
1 | HZ | 28978.42 | 4 | 321852 | 334.4 |
2 | HZ | 162270.145 | 31 | 1884274 | 242.4 |
2 | HZ | 162270.145 | 31 | 1884274 | 242.4 |
Need to assume that circle & label can correspond to each other i.e. same number label & circle in that order, or weird thing happens
circle_table <- raw_d %>%
filter(trial_type == "draw-circles") %>%
select(subject, locations, rt) %>%
toJSON() %>%
fromJSON() %>%
mutate(
circle_drawn = map(locations, ~ fromJSON(.) %>% as.data.frame())) %>%
unnest(circle_drawn) %>%
mutate(
circ_num = label,
circ_r = radius,
task_RT = rt,
task_name = "SymS",
temp_id = paste(subject,circ_num,sep="")
) %>%
select(subject, task_name, circ_num, circ_r, task_RT, task_name, temp_id)
label_table <- raw_d %>%
filter(trial_type == "circle-label") %>%
select(subject, responses) %>%
toJSON() %>%
fromJSON() %>%
mutate(
circ_label = map(responses, ~ fromJSON(.) %>% as.data.frame())) %>%
unnest(circ_label) %>%
mutate(
Q_Me = "Me"
) %>%
relocate(Q_Me,.before = Q0) %>%
pivot_longer(Q_Me:Q6, names_to = "label_names") %>%
filter(value != "") %>%
mutate(
circ_label = value
) %>%
select(subject, circ_label)
label_table$circ_id <- sequence(rle(label_table$subject)$lengths)
label_table <- label_table %>%
mutate(
temp_id = paste(subject, "circle", as.character(circ_id-1),sep = "")
)
SymS_table <- left_join(label_table, circle_table, by = "temp_id") %>%
mutate(
subject = subject.x
) %>%
select(subject, task_name, circ_num, circ_label, circ_r, task_RT)
# calculate the average non-self circle
average_other_circle <- SymS_table %>%
filter(circ_num != "circle0") %>%
group_by(subject) %>%
summarise(
mean = mean(circ_r*2)
)
## `summarise()` ungrouping output (override with `.groups` argument)
calculate_inflation_score <- function(id, other_df, summarize_df){
other_average <- other_df %>%
filter(subject == id) %>%
select(mean) %>%
pull()
self_circle <- summarize_df %>%
filter(subject == id) %>%
filter(circ_num == "circle0") %>%
select(circ_r) %>%
pull()
return (self_circle * 2 - other_average)
}
SymS_table$task_score <- unlist(map(SymS_table$subject,calculate_inflation_score, average_other_circle, SymS_table))
SymS_table %>% head() %>% kable()
subject | task_name | circ_num | circ_label | circ_r | task_RT | task_score |
---|---|---|---|---|---|---|
1 | SymS | circle0 | Me | 124.65151 | 10051.6 | 14.540532 |
1 | SymS | circle1 | mom | 107.20075 | 10051.6 | 14.540532 |
1 | SymS | circle2 | dad | 127.56175 | 10051.6 | 14.540532 |
2 | SymS | circle0 | Me | 100.60318 | 18732.985 | 3.633101 |
2 | SymS | circle1 | mom | 99.62429 | 18732.985 | 3.633101 |
2 | SymS | circle2 | dad | 97.94897 | 18732.985 | 3.633101 |
PC_table <- raw_d %>% filter(
grepl("pen_choice", stimulus)
) %>%
select(subject, button_pressed, unique_color, unique_position, rt) %>%
mutate(
task_name = "PC",
button_pressed = as.numeric(button_pressed),
unique_choice = case_when(
button_pressed == unique_position ~ "yes",
TRUE ~ "no"
)
)
PC_table %>% head() %>% kable()
subject | button_pressed | unique_color | unique_position | rt | task_name | unique_choice |
---|---|---|---|---|---|---|
1 | 4 | yellow | 3 | 3457 | PC | no |
2 | 4 | yellow | 2 | 2575 | PC | no |
3 | 3 | green | 1 | 3615 | PC | no |
4 | 4 | green | 3 | 2295 | PC | no |