Question:
over-complicated pipelines?
how many coders? -> structure of the pipelines
all
all
reading data
us_d <- read.csv(US_TBA_PATH)
cn_d <- read.csv(CN_TBA_PATH)
us_fd_form <- us_d %>%
filter(task_name == "FD") %>%
mutate(
stimulus = as.character(stimulus),
trial_raw = as.character(trial_raw),
culture = "US") %>%
arrange(stimulus) %>%
mutate(
coder = "",
codeable = "",
first_mention_focal = 0,
first_mention_backgrd = 0,
descriptive_focal = 0,
descriptive_backgrd = 0,
) %>%
select(-X, -trial_num)
#currently decide not to parse the keywords becaue they are not very efficient & do not significantly reduce the lengths
#system.time(us_fd_form$trial_parsed <- lapply(us_fd_form$trial_raw, parse_key_words))
us_fd_form %>% datatable()
cn_fd_form <- cn_d %>%
filter(task_name == "FD") %>%
mutate(
stimulus = as.character(stimulus),
trial_raw = as.character(trial_raw),
culture = "CN") %>%
arrange(stimulus) %>%
mutate(
coder = "",
codeable = "",
first_mention_focal = 0,
first_mention_backgrd = 0,
descriptive_focal = 0,
descriptive_backgrd = 0,
) %>%
select(-X, -trial_num)
cn_fd_form %>% datatable()
write.csv(us_fd_form, US_FD_PATH)
write.csv(cn_fd_form, CN_FD_PATH)
us_ca_form <- us_d %>%
filter(task_name == "CA") %>%
mutate(
culture = "US",
codeable = "",
attribution = "",
coder = "",
) %>%
select(-X, -trial_num)
us_ca_form %>% datatable()
cn_ca_form <- cn_d %>%
filter(task_name == "CA") %>%
mutate(
culture = "CN",
codeable = "",
attribution = "",
coder = "",
) %>%
select(-X, -trial_num)
cn_ca_form %>% datatable()
write.csv(us_ca_form, US_CA_PATH)
write.csv(cn_ca_form, CN_CA_PATH)
Here we only focus on the cases where circle num >= label num because we already throwing out the case where circle num < label num in exclude_task. R
syms_df <- read_csv(merged_PATH)
## Parsed with column specification:
## cols(
## .default = col_character(),
## trial_index = col_double(),
## time_elapsed = col_double(),
## correct = col_logical(),
## passed = col_logical(),
## activate = col_logical(),
## startTime = col_double(),
## endTime = col_double(),
## RT = col_double(),
## answer_correct = col_double(),
## unique_position = col_double(),
## response = col_double()
## )
## See spec(...) for full column specifications.
# summarizing circle numbers for each participant
num_circ <- count_circle_number(syms_df)
# summarizing label numbers for each participant
num_lab <- count_label_numbers(syms_df)
# combine the two dataframe together
num_sum <- left_join(num_circ, num_lab, by = "subject")
us_lab_check <- extract_label(syms_df) %>% filter(culture == "US")
cn_lab_check <- extract_label(syms_df) %>% filter(culture == "CN")
us_lab_check$has_basic_label = lapply(us_lab_check$label_all, check_basic_label,"US")
cn_lab_check$has_basic_label = lapply(cn_lab_check$label_all, check_basic_label,"CN")
us_human_check_label <- us_lab_check %>%
filter(has_basic_label == FALSE) %>%
mutate(
codeable = "",
coder = ""
) %>%
select(
-circ_label,
-has_basic_label
)
cn_human_check_label <- cn_lab_check %>%
filter(has_basic_label == FALSE) %>%
mutate(
codeable = "",
coder = ""
) %>%
select(
-circ_label,
-has_basic_label
)
us_human_check_label %>% datatable()
cn_human_check_label %>% datatable()
write.csv(us_human_check_label, US_SYMS_LABEL_PATH)
write.csv(cn_human_check_label, CN_SYMS_LABEL_PATH)
# Only check when the number of circles is greater than the number of labels, after getting rid of r = 0 and r = 1
more_circ_sub <- num_sum %>%
filter(label_n < circle_n) %>%
pull(subject)
# filter out the ones need human eyes, prepare for visualizing
more_circ_check <- syms_df %>%
filter(trial_type == "draw-circles", subject %in% more_circ_sub) %>%
extract_circle()
# prepare forms for human to read and fill in
to_check_form <- more_circ_check %>%
select(subject, locations) %>%
distinct(subject, locations) %>%
mutate(
codeable = "",
new_locations = "",
coder = ""
)
# function that run through all the subject and save the files in the folder
save_image <- function(subjects, df){
num_id <- subjects
for (id in num_id){
d_circle <- df %>%
filter(subject == id)
id <- as.character(id)
f_name <- paste(SYMS_DIR, id, ".png", sep="")
d_circle %>%
ggplot() +
geom_circle(aes(x0 = x, y0 = y, r = radius)) +
geom_text(aes(x = x, y = y, label = label)) +
coord_fixed(xlim = c(0,1024), ylim = c(0, 800)) +
scale_y_reverse()
ggsave(f_name)
}
}
#save_image(to_check_form$subject, more_circ_check)
#From check visualization:
#if codeable
# circles dimension, assert(circles == lables num)
# if not codeable: add to exclusion
# From label visualization:
# if acceptable, nothing
# if not acceptable, add to exclusion
# make sure all are excluded
# make sure human checkers said yes to the labels
# make sure the number of labels match the number of circles
# calculate score!
to_check_form %>% datatable()
write.csv(to_check_form, CIRCLE_FORM_PATH)
for preview purpose, this is what one of the figures look like:
num_id <- to_check_form$subject
more_circ_check %>%
ggplot() +
geom_circle(aes(x0 = x, y0 = y, r = radius)) +
geom_text(aes(x = x, y = y, label = label)) +
coord_fixed(xlim = c(0,1024), ylim = c(0, 800)) +
scale_y_reverse() +
facet_wrap(~subject)