INTRO and Todo

The scripts contained in this markdown should in principle satisfy the following criteria:

PATH <- here("anonymized_All.csv")
raw_d <- read.csv(PATH)

FD table : Free Description Table

Todo: automatic score conversion?

fd_response <- raw_d %>% filter(
  trial_type == "free-description-response") %>% 
  filter((str_length(as.character(responses))) > 19) %>% # get rid of shorter response
  select(responses, rt) 



fd_stimulus <- raw_d %>% filter(
  trial_type == "image-keyboard-response" & str_detect(stimulus, "free_description") 
) %>% select(subject, stimulus) 

fd_stimulus$trial_num <- sequence(rle(fd_stimulus$subject)$lengths) #add trial num for each participant

fd_table <- bind_cols(fd_stimulus, fd_response) %>% 
            mutate(task_name = "FD", 
                   trial_RT = rt) %>%  
            rowwise() %>% 
            mutate(trial_raw = 
                  unlist(fromJSON(as.character(responses)))) %>% 
  select(subject, task_name, stimulus, trial_num,trial_raw, rt)

fd_table %>% head() %>%  kable()

subject	task_name	stimulus	trial_num	trial_raw	rt
1	FD	images/free_description/01-bird.jpg	1	A bird standing on the rock	6205.185
1	FD	images/free_description/05-cow-black.jpg	2	A weird looking animal in front of the trees	7610.06
1	FD	images/free_description/04-car.jpg	3	I forgot ……………..lol	6364.65
1	FD	images/free_description/08-fish.jpg	4	A fish in front of something yellow	15905.93
1	FD	images/free_description/06-bike.jpg	5	hahahahahahahahahha	4154.55
1	FD	images/free_description/02-boat-castle.jpg	6	boat????lalalalala	6584.525

CA table - Causal Attribtution Table

Todo: score calculating

CA_table <- raw_d %>% 
  filter(trial_type == "attribution-Q1") %>% 
  rowwise() %>% 
  mutate(
    task_name = "CA",
    trial_RT = rt, 
    trial_raw = unlist(fromJSON(as.character(responses)))
  ) %>% 
  select(subject, task_name, trial_RT, trial_raw)
  
CA_table$trial_num <- sequence(rle(CA_table$subject)$lengths)
CA_table %>% head() %>%  kable()

subject	task_name	trial_RT	trial_raw	trial_num
1	CA	13359.37	Because she’s scared	1
1	CA	8947.505	Because it’s dangerous	2
2	CA	19951.755	because the bicycle is dangerous to play on	1
2	CA	12144.275	because the bicycle is dangerous to play on	2
3	CA	50499.88	both are
possible!	1
3	CA	16859.85	same as before	2

EB table - Ebbinghaus table

Todo: calculate the key measurement messy data structure – individual and collected together

EB_table <- raw_d %>%
  filter(trial_type == "ebbinghaus-keyboard") %>% separate(stimulus, into = c("path_a","path_b","stim_name"),sep = "/") %>%
  separate(stim_name, into = c("stim_id", "trial_stim_left", "trial_stim_right"), sep = "_") %>% select(subject, key_press, rt, answer_correct, stim_id, trial_stim_left, trial_stim_right) %>% 
  mutate(
    task_name = "EB",
    block_type = case_when(
      stim_id == "N" ~ "NC", 
      TRUE ~ "IL"
    ), 
    trial_stim_right = gsub(".jpg","", trial_stim_right), 
    trial_key_pressed = case_when(
      key_press == 77 ~ "m", 
      TRUE ~ "z"
    ), 
    trial_correct = answer_correct,
    temp_id = paste(subject, block_type), 
    trial_RT = rt
  )

EB_table$trial_num <- sequence(rle(EB_table$temp_id)$lengths)
EB_table_summarize <- EB_table %>% 
  group_by(subject,block_type) %>% 
  summarise(
    block_RT = mean(as.numeric(trial_RT)),
    block_correct = mean(as.numeric(trial_correct))
  ) %>% 
  mutate(temp_id = paste(subject, block_type))

## `summarise()` regrouping output by 'subject' (override with `.groups` argument)

EB_table <- left_join(EB_table, EB_table_summarize, by = "temp_id") %>% 
  mutate(subject = subject.x,
         block_type = block_type.x) %>% 
  select(subject, task_name, block_type, block_RT, block_correct, trial_num, trial_stim_left, trial_stim_right, trial_key_pressed, trial_RT, trial_correct)

EB_table %>% head() %>%  kable(full_width = F)

subject	task_name	block_type	block_RT	block_correct	trial_num	trial_stim_left	trial_stim_right	trial_key_pressed	trial_RT	trial_correct
1	EB	NC	179.9	0.5	1	106	100	m	1050.05	0
1	EB	NC	179.9	0.5	2	100	110	z	388.265	0
1	EB	NC	179.9	0.5	3	98	100	m	759.36	1
1	EB	NC	179.9	0.5	4	100	86	z	390.715	1
1	EB	NC	179.9	0.5	5	100	118	m	557.58	1
1	EB	NC	179.9	0.5	6	100	102	z	298.895	0

RMTS table

Todo: not sure about the conversion system right now

RMTS_cond_table <- raw_d %>% filter(trial_type == "RMTStrain") %>% 
  select(subject, left_object, right_object, activate) %>% 
  mutate(
    left_object = as.character(left_object), 
    right_object = as.character(right_object),
    condition = case_when(
      (left_object == right_object) && (activate == TRUE) ~ "same_actv",
      (left_object != right_object) && (activate == TRUE) ~ "diff_actv",
      (left_object == right_object) && (activate == FALSE) ~ "diff_actv", 
      (left_object != right_object) && (activate == FALSE) ~ "same_actv")
    ) %>% 
  group_by(subject) %>% 
  distinct(condition, .keep_all = TRUE) %>% 
  select(subject, condition)

RMTS_table <- raw_d %>% filter(trial_type == "RMTStest") %>% 
  select(subject, choice, sameSide, samePair, diffPair) %>% 
  left_join(RMTS_cond_table, by = "subject") %>% 
  mutate(
    task_name = "RMTS", 
    choice = as.character(choice), 
    sameSide = as.character(sameSide), 
    trial_choice = case_when(
      condition == "diff_actv" && choice == "rightTestButton" && sameSide == "R" ~ "obj", 
      condition == "diff_actv" && choice == "rightTestButton" && sameSide == "L" ~ "rel", 
      condition == "diff_actv" && choice == "leftTestButton" && sameSide == "R" ~ "rel", 
      condition == "diff_actv" && choice == "leftTestButton" && sameSide == "L" ~ "obj", 
      condition == "same_actv" && choice == "rightTestButton" && sameSide == "R" ~ "rel", 
      condition == "same_actv" && choice == "rightTestButton" && sameSide == "L" ~ "obj", 
      condition == "same_actv" && choice == "leftTestButton" && sameSide == "R" ~ "obj", 
      condition == "same_actv" && choice == "leftTestButton" && sameSide == "L" ~ "rel"
    )
  ) %>% 
  select(subject, task_name, trial_choice)

RMTS_table$trial_num <- sequence(rle(RMTS_table$subject)$lengths)

RMTS_table %>% head() %>% kable()

subject	task_name	trial_choice	trial_num
1	RMTS	obj	1
1	RMTS	obj	2
1	RMTS	obj	3
1	RMTS	obj	4
2	RMTS	obj	1
2	RMTS	obj	2

HZ table - Horizon sticker table

height or proportion of height?

# build a dictionary with file name as key and size as value 
sticker_dir = "preprocessing/material_stickers"
stickers = list.files(sticker_dir)

d <- dict()
for (s in stickers){
  img <- readPNG(file.path(sticker_dir, s)) 
  dim <- dim(img)
  size <- dim[[1]] * dim[[2]]
  src_name <- paste("images/sort/",s,sep = "")
  d[src_name] <- size[[1]]
}

# get all the trials with sticker 
sticker_d <- raw_d %>%filter(trial_type == "horizon-sticker")

#focus on the moved sticker for each participant 
moved_sticker <- sticker_d %>% 
  select(subject, rt, trial_type, moves,final_locations) %>% 
  toJSON() %>% 
  fromJSON() %>% 
  mutate(
    moves = as.character(moves),
    moves = map(moves, ~ fromJSON(.) %>% as.data.frame())) %>% 
  unnest(moves) %>% 
  group_by(subject) %>% 
  distinct(src, .keep_all = TRUE)



# helper function to get each sticker's dimension from dictionary
get_sticker_dim <- function(src, d){
  return (d[src][[1]])
}

# convert from src name to sticker dimension 
moved_sticker$size <- unlist(map(moved_sticker$src,get_sticker_dim, d))

# caculate total sticker areas for each subject 
sticker_area <- moved_sticker %>% 
  group_by(subject) %>% 
  summarize(
    sum_area = sum(size)
  )

## `summarise()` ungrouping output (override with `.groups` argument)

# function that map back to the dataframe
get_sticker_total_area <- function(id, caculated_df){
  area <- caculated_df %>% 
    filter(subject == id) %>% 
    select(sum_area) %>% 
    pull()
  return (area)
}

# add a column back to the raw dataframe 
moved_sticker$stkr_area <- unlist(map(moved_sticker$subject,get_sticker_total_area, sticker_area))

# calculate final height  
CANVAS_HEIGHT <-334.4

#focus on the moved sticker for each participant 
final_sticker <- sticker_d %>% 
  select(subject, rt, trial_type, moves,final_locations) %>% 
  toJSON() %>% 
  fromJSON() %>% 
  mutate(
    final_locations = as.character(final_locations),
    final_locations = map(final_locations, ~ fromJSON(.) %>% as.data.frame())) %>% 
  unnest(final_locations) %>% 
  filter(src == "images/sort/horizon_2.png") %>% 
  mutate(
    hz_height = CANVAS_HEIGHT - y
  ) %>% 
  select(subject, hz_height)

count_stkr <- moved_sticker %>% 
  group_by(subject) %>% 
  summarise(
    stkr_count = n()
  )

## `summarise()` ungrouping output (override with `.groups` argument)

stkr_df <- left_join(moved_sticker, final_sticker, by = "subject")
 
HZ_table <- left_join(stkr_df, count_stkr, by = "subject") %>% 
  mutate(task_name = "HZ") %>% 
  select(subject, task_name, rt, stkr_count, stkr_area, hz_height)

HZ_table %>% head() %>% kable()

subject	task_name	rt	stkr_count	stkr_area	hz_height
1	HZ	28978.42	4	321852	334.4
1	HZ	28978.42	4	321852	334.4
1	HZ	28978.42	4	321852	334.4
1	HZ	28978.42	4	321852	334.4
2	HZ	162270.145	31	1884274	242.4
2	HZ	162270.145	31	1884274	242.4

Syms Table - Symbolic Self-Inflation Table

Need to assume that circle & label can correspond to each other i.e. same number label & circle in that order, or weird thing happens

circle_table <- raw_d %>% 
  filter(trial_type == "draw-circles") %>% 
  select(subject, locations, rt) %>% 
  toJSON() %>% 
  fromJSON() %>% 
  mutate(
    circle_drawn = map(locations, ~ fromJSON(.) %>% as.data.frame())) %>% 
  unnest(circle_drawn) %>% 
  mutate(
    circ_num = label, 
    circ_r = radius, 
    task_RT = rt, 
    task_name = "SymS",
    temp_id = paste(subject,circ_num,sep="")
  ) %>% 
  select(subject, task_name, circ_num, circ_r, task_RT, task_name, temp_id)

label_table <- raw_d %>% 
  filter(trial_type == "circle-label") %>% 
  select(subject, responses) %>%
  toJSON() %>% 
  fromJSON() %>%
  mutate(
    circ_label = map(responses, ~ fromJSON(.) %>% as.data.frame())) %>% 
  unnest(circ_label) %>% 
  mutate(
    Q_Me = "Me"
    ) %>% 
  relocate(Q_Me,.before = Q0) %>% 
  pivot_longer(Q_Me:Q6, names_to = "label_names") %>% 
  filter(value != "") %>% 
  mutate(
    circ_label = value
    
  ) %>% 
  select(subject, circ_label)

label_table$circ_id <- sequence(rle(label_table$subject)$lengths)
label_table <- label_table %>% 
  mutate(
    temp_id = paste(subject, "circle", as.character(circ_id-1),sep = "")
  )


SymS_table <- left_join(label_table, circle_table, by = "temp_id") %>% 
  mutate(
    subject = subject.x
  ) %>% 
  select(subject, task_name, circ_num, circ_label, circ_r, task_RT)

# calculate the average non-self circle 
average_other_circle <- SymS_table %>% 
  filter(circ_num != "circle0") %>% 
  group_by(subject) %>% 
  summarise(
    mean = mean(circ_r*2)
  )

## `summarise()` ungrouping output (override with `.groups` argument)

calculate_inflation_score <- function(id, other_df, summarize_df){
  other_average <- other_df %>% 
    filter(subject == id) %>% 
    select(mean) %>% 
    pull()
  
  self_circle <- summarize_df %>% 
    filter(subject == id) %>% 
    filter(circ_num == "circle0") %>% 
    select(circ_r) %>% 
    pull() 
  
  return (self_circle * 2 - other_average)
}


SymS_table$task_score <- unlist(map(SymS_table$subject,calculate_inflation_score, average_other_circle, SymS_table))

SymS_table %>% head() %>% kable()

subject	task_name	circ_num	circ_label	circ_r	task_RT	task_score
1	SymS	circle0	Me	124.65151	10051.6	14.540532
1	SymS	circle1	mom	107.20075	10051.6	14.540532
1	SymS	circle2	dad	127.56175	10051.6	14.540532
2	SymS	circle0	Me	100.60318	18732.985	3.633101
2	SymS	circle1	mom	99.62429	18732.985	3.633101
2	SymS	circle2	dad	97.94897	18732.985	3.633101

PC table - Pen Choice Table

maybe most should go into a separate table

PC_table <- raw_d %>% filter(
  grepl("pen_choice", stimulus)
) %>% 
  select(subject, button_pressed, unique_color, unique_position, rt) %>% 
  mutate(
    task_name = "PC", 
    button_pressed = as.numeric(button_pressed), 
    unique_choice = case_when(
      button_pressed == unique_position ~ "yes", 
      TRUE ~ "no"
    )
  )
  
PC_table %>% head() %>% kable()

subject	button_pressed	unique_color	unique_position	rt	task_name	unique_choice
1	4	yellow	3	3457	PC	no
2	4	yellow	2	2575	PC	no
3	3	green	1	3615	PC	no
4	4	green	3	2295	PC	no

Data Preprocessing Preview Step2

anjie

9/4/2020