INTRO and Todo

The scripts contained in this markdown should in principle satisfy the following criteria:

PATH <- here("anonymized_All.csv")
raw_d <- read.csv(PATH)

FD table : Free Description Table

Todo: automatic score conversion?

fd_response <- raw_d %>% filter(
  trial_type == "free-description-response") %>% 
  filter((str_length(as.character(responses))) > 19) %>% # get rid of shorter response
  select(responses, rt) 



fd_stimulus <- raw_d %>% filter(
  trial_type == "image-keyboard-response" & str_detect(stimulus, "free_description") 
) %>% select(subject, stimulus) 

fd_stimulus$trial_num <- sequence(rle(fd_stimulus$subject)$lengths) #add trial num for each participant

fd_table <- bind_cols(fd_stimulus, fd_response) %>% 
            mutate(task_name = "FD", 
                   trial_RT = rt) %>%  
            rowwise() %>% 
            mutate(trial_raw = 
                  unlist(fromJSON(as.character(responses)))) %>% 
  select(subject, task_name, stimulus, trial_num,trial_raw, rt)

fd_table %>% head() %>%  kable()
subject task_name stimulus trial_num trial_raw rt
1 FD images/free_description/01-bird.jpg 1 A bird standing on the rock 6205.185
1 FD images/free_description/05-cow-black.jpg 2 A weird looking animal in front of the trees 7610.06
1 FD images/free_description/04-car.jpg 3 I forgot ……………..lol 6364.65
1 FD images/free_description/08-fish.jpg 4 A fish in front of something yellow 15905.93
1 FD images/free_description/06-bike.jpg 5 hahahahahahahahahha 4154.55
1 FD images/free_description/02-boat-castle.jpg 6 boat????lalalalala 6584.525

CA table - Causal Attribtution Table

Todo: score calculating

CA_table <- raw_d %>% 
  filter(trial_type == "attribution-Q1") %>% 
  rowwise() %>% 
  mutate(
    task_name = "CA",
    trial_RT = rt, 
    trial_raw = unlist(fromJSON(as.character(responses)))
  ) %>% 
  select(subject, task_name, trial_RT, trial_raw)
  
CA_table$trial_num <- sequence(rle(CA_table$subject)$lengths)
CA_table %>% head() %>%  kable()
subject task_name trial_RT trial_raw trial_num
1 CA 13359.37 Because she’s scared 1
1 CA 8947.505 Because it’s dangerous 2
2 CA 19951.755 because the bicycle is dangerous to play on 1
2 CA 12144.275 because the bicycle is dangerous to play on 2
3 CA 50499.88 both are
possible! 1
3 CA 16859.85 same as before 2

EB table - Ebbinghaus table

Todo: calculate the key measurement messy data structure – individual and collected together

EB_table <- raw_d %>%
  filter(trial_type == "ebbinghaus-keyboard") %>% separate(stimulus, into = c("path_a","path_b","stim_name"),sep = "/") %>%
  separate(stim_name, into = c("stim_id", "trial_stim_left", "trial_stim_right"), sep = "_") %>% select(subject, key_press, rt, answer_correct, stim_id, trial_stim_left, trial_stim_right) %>% 
  mutate(
    task_name = "EB",
    block_type = case_when(
      stim_id == "N" ~ "NC", 
      TRUE ~ "IL"
    ), 
    trial_stim_right = gsub(".jpg","", trial_stim_right), 
    trial_key_pressed = case_when(
      key_press == 77 ~ "m", 
      TRUE ~ "z"
    ), 
    trial_correct = answer_correct,
    temp_id = paste(subject, block_type), 
    trial_RT = rt
  )

EB_table$trial_num <- sequence(rle(EB_table$temp_id)$lengths)
EB_table_summarize <- EB_table %>% 
  group_by(subject,block_type) %>% 
  summarise(
    block_RT = mean(as.numeric(trial_RT)),
    block_correct = mean(as.numeric(trial_correct))
  ) %>% 
  mutate(temp_id = paste(subject, block_type))
## `summarise()` regrouping output by 'subject' (override with `.groups` argument)
EB_table <- left_join(EB_table, EB_table_summarize, by = "temp_id") %>% 
  mutate(subject = subject.x,
         block_type = block_type.x) %>% 
  select(subject, task_name, block_type, block_RT, block_correct, trial_num, trial_stim_left, trial_stim_right, trial_key_pressed, trial_RT, trial_correct)

EB_table %>% head() %>%  kable(full_width = F)
subject task_name block_type block_RT block_correct trial_num trial_stim_left trial_stim_right trial_key_pressed trial_RT trial_correct
1 EB NC 179.9 0.5 1 106 100 m 1050.05 0
1 EB NC 179.9 0.5 2 100 110 z 388.265 0
1 EB NC 179.9 0.5 3 98 100 m 759.36 1
1 EB NC 179.9 0.5 4 100 86 z 390.715 1
1 EB NC 179.9 0.5 5 100 118 m 557.58 1
1 EB NC 179.9 0.5 6 100 102 z 298.895 0

RMTS table

Todo: not sure about the conversion system right now

RMTS_cond_table <- raw_d %>% filter(trial_type == "RMTStrain") %>% 
  select(subject, left_object, right_object, activate) %>% 
  mutate(
    left_object = as.character(left_object), 
    right_object = as.character(right_object),
    condition = case_when(
      (left_object == right_object) && (activate == TRUE) ~ "same_actv",
      (left_object != right_object) && (activate == TRUE) ~ "diff_actv",
      (left_object == right_object) && (activate == FALSE) ~ "diff_actv", 
      (left_object != right_object) && (activate == FALSE) ~ "same_actv")
    ) %>% 
  group_by(subject) %>% 
  distinct(condition, .keep_all = TRUE) %>% 
  select(subject, condition)

RMTS_table <- raw_d %>% filter(trial_type == "RMTStest") %>% 
  select(subject, choice, sameSide, samePair, diffPair) %>% 
  left_join(RMTS_cond_table, by = "subject") %>% 
  mutate(
    task_name = "RMTS", 
    choice = as.character(choice), 
    sameSide = as.character(sameSide), 
    trial_choice = case_when(
      condition == "diff_actv" && choice == "rightTestButton" && sameSide == "R" ~ "obj", 
      condition == "diff_actv" && choice == "rightTestButton" && sameSide == "L" ~ "rel", 
      condition == "diff_actv" && choice == "leftTestButton" && sameSide == "R" ~ "rel", 
      condition == "diff_actv" && choice == "leftTestButton" && sameSide == "L" ~ "obj", 
      condition == "same_actv" && choice == "rightTestButton" && sameSide == "R" ~ "rel", 
      condition == "same_actv" && choice == "rightTestButton" && sameSide == "L" ~ "obj", 
      condition == "same_actv" && choice == "leftTestButton" && sameSide == "R" ~ "obj", 
      condition == "same_actv" && choice == "leftTestButton" && sameSide == "L" ~ "rel"
    )
  ) %>% 
  select(subject, task_name, trial_choice)

RMTS_table$trial_num <- sequence(rle(RMTS_table$subject)$lengths)

RMTS_table %>% head() %>% kable()
subject task_name trial_choice trial_num
1 RMTS obj 1
1 RMTS obj 2
1 RMTS obj 3
1 RMTS obj 4
2 RMTS obj 1
2 RMTS obj 2

HZ table - Horizon sticker table

height or proportion of height?

# build a dictionary with file name as key and size as value 
sticker_dir = "preprocessing/material_stickers"
stickers = list.files(sticker_dir)

d <- dict()
for (s in stickers){
  img <- readPNG(file.path(sticker_dir, s)) 
  dim <- dim(img)
  size <- dim[[1]] * dim[[2]]
  src_name <- paste("images/sort/",s,sep = "")
  d[src_name] <- size[[1]]
}

# get all the trials with sticker 
sticker_d <- raw_d %>%filter(trial_type == "horizon-sticker")

#focus on the moved sticker for each participant 
moved_sticker <- sticker_d %>% 
  select(subject, rt, trial_type, moves,final_locations) %>% 
  toJSON() %>% 
  fromJSON() %>% 
  mutate(
    moves = as.character(moves),
    moves = map(moves, ~ fromJSON(.) %>% as.data.frame())) %>% 
  unnest(moves) %>% 
  group_by(subject) %>% 
  distinct(src, .keep_all = TRUE)



# helper function to get each sticker's dimension from dictionary
get_sticker_dim <- function(src, d){
  return (d[src][[1]])
}

# convert from src name to sticker dimension 
moved_sticker$size <- unlist(map(moved_sticker$src,get_sticker_dim, d))

# caculate total sticker areas for each subject 
sticker_area <- moved_sticker %>% 
  group_by(subject) %>% 
  summarize(
    sum_area = sum(size)
  ) 
## `summarise()` ungrouping output (override with `.groups` argument)
# function that map back to the dataframe
get_sticker_total_area <- function(id, caculated_df){
  area <- caculated_df %>% 
    filter(subject == id) %>% 
    select(sum_area) %>% 
    pull()
  return (area)
}

# add a column back to the raw dataframe 
moved_sticker$stkr_area <- unlist(map(moved_sticker$subject,get_sticker_total_area, sticker_area))

# calculate final height  
CANVAS_HEIGHT <-334.4

#focus on the moved sticker for each participant 
final_sticker <- sticker_d %>% 
  select(subject, rt, trial_type, moves,final_locations) %>% 
  toJSON() %>% 
  fromJSON() %>% 
  mutate(
    final_locations = as.character(final_locations),
    final_locations = map(final_locations, ~ fromJSON(.) %>% as.data.frame())) %>% 
  unnest(final_locations) %>% 
  filter(src == "images/sort/horizon_2.png") %>% 
  mutate(
    hz_height = CANVAS_HEIGHT - y
  ) %>% 
  select(subject, hz_height)

count_stkr <- moved_sticker %>% 
  group_by(subject) %>% 
  summarise(
    stkr_count = n()
  )
## `summarise()` ungrouping output (override with `.groups` argument)
stkr_df <- left_join(moved_sticker, final_sticker, by = "subject")
 
HZ_table <- left_join(stkr_df, count_stkr, by = "subject") %>% 
  mutate(task_name = "HZ") %>% 
  select(subject, task_name, rt, stkr_count, stkr_area, hz_height)

HZ_table %>% head() %>% kable()
subject task_name rt stkr_count stkr_area hz_height
1 HZ 28978.42 4 321852 334.4
1 HZ 28978.42 4 321852 334.4
1 HZ 28978.42 4 321852 334.4
1 HZ 28978.42 4 321852 334.4
2 HZ 162270.145 31 1884274 242.4
2 HZ 162270.145 31 1884274 242.4

Syms Table - Symbolic Self-Inflation Table

Need to assume that circle & label can correspond to each other i.e. same number label & circle in that order, or weird thing happens

circle_table <- raw_d %>% 
  filter(trial_type == "draw-circles") %>% 
  select(subject, locations, rt) %>% 
  toJSON() %>% 
  fromJSON() %>% 
  mutate(
    circle_drawn = map(locations, ~ fromJSON(.) %>% as.data.frame())) %>% 
  unnest(circle_drawn) %>% 
  mutate(
    circ_num = label, 
    circ_r = radius, 
    task_RT = rt, 
    task_name = "SymS",
    temp_id = paste(subject,circ_num,sep="")
  ) %>% 
  select(subject, task_name, circ_num, circ_r, task_RT, task_name, temp_id)

label_table <- raw_d %>% 
  filter(trial_type == "circle-label") %>% 
  select(subject, responses) %>%
  toJSON() %>% 
  fromJSON() %>%
  mutate(
    circ_label = map(responses, ~ fromJSON(.) %>% as.data.frame())) %>% 
  unnest(circ_label) %>% 
  mutate(
    Q_Me = "Me"
    ) %>% 
  relocate(Q_Me,.before = Q0) %>% 
  pivot_longer(Q_Me:Q6, names_to = "label_names") %>% 
  filter(value != "") %>% 
  mutate(
    circ_label = value
    
  ) %>% 
  select(subject, circ_label)

label_table$circ_id <- sequence(rle(label_table$subject)$lengths)
label_table <- label_table %>% 
  mutate(
    temp_id = paste(subject, "circle", as.character(circ_id-1),sep = "")
  )


SymS_table <- left_join(label_table, circle_table, by = "temp_id") %>% 
  mutate(
    subject = subject.x
  ) %>% 
  select(subject, task_name, circ_num, circ_label, circ_r, task_RT)

# calculate the average non-self circle 
average_other_circle <- SymS_table %>% 
  filter(circ_num != "circle0") %>% 
  group_by(subject) %>% 
  summarise(
    mean = mean(circ_r*2)
  )
## `summarise()` ungrouping output (override with `.groups` argument)
calculate_inflation_score <- function(id, other_df, summarize_df){
  other_average <- other_df %>% 
    filter(subject == id) %>% 
    select(mean) %>% 
    pull()
  
  self_circle <- summarize_df %>% 
    filter(subject == id) %>% 
    filter(circ_num == "circle0") %>% 
    select(circ_r) %>% 
    pull() 
  
  return (self_circle * 2 - other_average)
}


SymS_table$task_score <- unlist(map(SymS_table$subject,calculate_inflation_score, average_other_circle, SymS_table))

SymS_table %>% head() %>% kable()
subject task_name circ_num circ_label circ_r task_RT task_score
1 SymS circle0 Me 124.65151 10051.6 14.540532
1 SymS circle1 mom 107.20075 10051.6 14.540532
1 SymS circle2 dad 127.56175 10051.6 14.540532
2 SymS circle0 Me 100.60318 18732.985 3.633101
2 SymS circle1 mom 99.62429 18732.985 3.633101
2 SymS circle2 dad 97.94897 18732.985 3.633101

PC table - Pen Choice Table

  • maybe most should go into a separate table
PC_table <- raw_d %>% filter(
  grepl("pen_choice", stimulus)
) %>% 
  select(subject, button_pressed, unique_color, unique_position, rt) %>% 
  mutate(
    task_name = "PC", 
    button_pressed = as.numeric(button_pressed), 
    unique_choice = case_when(
      button_pressed == unique_position ~ "yes", 
      TRUE ~ "no"
    )
  )
  
PC_table %>% head() %>% kable()
subject button_pressed unique_color unique_position rt task_name unique_choice
1 4 yellow 3 3457 PC no
2 4 yellow 2 2575 PC no
3 3 green 1 3615 PC no
4 4 green 3 2295 PC no