Read in raw trial data

p1_young= read.csv("/Documents/GRADUATE_SCHOOL/Projects/next_kids/experiments/nextKids_pilot1/data/next/3-4yo/participant_data_34.csv") %>%
  mutate(exp = 1, 
         age_group = "young")
p1_mid= read.csv("/Documents/GRADUATE_SCHOOL/Projects/next_kids/experiments/nextKids_pilot1/data/next/5-6yo/participant_data_56.csv") %>%
  mutate(exp = 1,
         age_group = "mid")
p2_young= read.csv("/Documents/GRADUATE_SCHOOL/Projects/next_kids/experiments/nextKids_pilot2/data/3-4yo/participant_data_34.csv") %>%
  mutate(exp = 2,
         age_group = "young")
p2_mid= read.csv("/Documents/GRADUATE_SCHOOL/Projects/next_kids/experiments/nextKids_pilot2/data/5-6yo/participant_data_56.csv") %>%
  mutate(exp = 2,
         age_group = "mid")

p2_old= read.csv("/Documents/GRADUATE_SCHOOL/Projects/next_kids/experiments/nextKids_pilot2/data/7-8yo/participant_data_78.csv") %>%
  mutate(exp = 2,
         age_group = "old")

Merge and munge

d = rbind(p1_young, p1_mid, p2_young, p2_mid, p2_old) %>%
    filter(alg_id == "UncertaintySampling") %>%
    mutate(participant_uid, subid = lapply(str_split(participant_uid, "_"), 
                                   function(x) {x[2]})) %>%
    select(exp, subid, age_group, target_center, target_left, target_right, target_winner) %>%
    mutate_each(funs(lapply(str_split(., ".com/|.jpg"), 
                                   function(x) {x[2]})), -exp, -age_group, -subid)  %>%
    mutate_each(funs(as.factor(unlist(.))),-exp, -age_group)  %>%
    mutate(age_group = fct_relevel(age_group, "young", "mid")) 


d = filter(d, subid != "7smVlvhKMXzrayO8MtwFKuo23EWP9N")

Demographics

Number of participants

d %>%
  group_by(subid, age_group,exp) %>%
  slice(1) %>%
  group_by(exp, age_group) %>%
  summarize(n = n())  %>%
  kable()
exp age_group n
1 young 5
1 mid 3
2 young 12
2 mid 8
2 old 3

Number of trials

d %>%
  group_by(exp, age_group) %>%
  summarize(n = n())  %>%
  kable()
exp age_group n
1 young 25
1 mid 15
2 young 223
2 mid 163
2 old 56

Munge data for python

Convert names to numeric (for python)

#images = unique(d$i)
#image_dict = data.frame(images = images, image_num = 0:(length(images)-1))
#write.csv(image_dict, "image_dict.csv", row.names = F)
image_dict = read.csv("image_dict.csv")
d = d %>%
    mutate(i = target_center,
           j = ifelse(target_left == target_winner, as.character(target_right), as.character(target_left)),
           k = target_winner)

d_numeric = d %>%
  select(exp, age_group, i, j, k) %>%
  left_join(select(image_dict, images, image_num), by=c("i"="images")) %>%
  select(-i) %>%
  rename(i = image_num) %>%
  left_join(select(image_dict, images, image_num), by=c("j"="images")) %>%
  select(-j) %>%
  rename(j = image_num) %>%
  left_join(select(image_dict, images, image_num), by=c("k"="images")) %>%
  select(-k) %>%
  rename(k = image_num)

Save for python

write.csv(d_numeric, "triplet_data.csv", row.names = F)

Run get_embeddings.ipynb to get embeddings (computeEmbeddings python) jupyter notebook get_embeddings.ipynb.

Embeddings

Uncertainty E1 and E2

es = read.csv("all_embeddings_uncertainty.csv") %>%
  rename(image_num = X.1) %>%
  mutate(age_group = fct_relevel(age_group, "young", "mid")) %>%
  left_join(image_dict)

Points

ggplot(es, aes(x = X, y = Y, color = type, shape = theme)) +
   geom_point(size = 3) +
   facet_wrap(~age_group) +
   theme_bw() +
   theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text.x = element_text(size = 14),
        strip.background = element_rect(colour="grey", fill="grey"),
        axis.title=element_blank(),
        axis.text=element_blank(),
        axis.ticks=element_blank(),
        legend.position = "bottom")

Labels

ggplot(es, aes(x = X, y = Y)) +
   geom_text(aes(label=images), size = 3) +
   facet_wrap(~age_group) +
   theme_bw() +
   theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text.x = element_text(size = 14),
        strip.background = element_rect(colour="grey", fill="grey"),
        axis.title=element_blank(),
        axis.text=element_blank(),
        axis.ticks=element_blank())

All E1 and E2

es = read.csv("all_embeddings_all.csv") %>%
  rename(image_num = X.1) %>%
  mutate(age_group = fct_relevel(age_group, "young", "mid")) %>%
  left_join(image_dict)

Points

ggplot(es, aes(x = X, y = Y, color = type, shape = theme)) +
   geom_point(size = 3) +
   facet_wrap(~age_group) +
   theme_bw() +
   theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text.x = element_text(size = 14),
        strip.background = element_rect(colour="grey", fill="grey"),
        axis.title=element_blank(),
        axis.text=element_blank(),
        axis.ticks=element_blank(),
        legend.position = "bottom")

Labels

ggplot(es, aes(x = X, y = Y)) +
   geom_text(aes(label=images), size = 3) +
   facet_wrap(~age_group) +
   theme_bw() +
   theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text.x = element_text(size = 14),
        strip.background = element_rect(colour="grey", fill="grey"),
        axis.title=element_blank(),
        axis.text=element_blank(),
        axis.ticks=element_blank())

Uncertainty E2

es = read.csv("all_embeddings_2_uncertainty.csv") %>%
  rename(image_num = X.1) %>%
  mutate(age_group = fct_relevel(age_group, "young", "mid")) %>%
  left_join(image_dict)

Points

ggplot(es, aes(x = X, y = Y, color = type, shape = theme)) +
   geom_point(size = 3) +
   facet_wrap(~age_group) +
   theme_bw() +
   theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text.x = element_text(size = 14),
        strip.background = element_rect(colour="grey", fill="grey"),
        axis.title=element_blank(),
        axis.text=element_blank(),
        axis.ticks=element_blank(),
        legend.position = "bottom")

Labels

ggplot(es, aes(x = X, y = Y)) +
   geom_text(aes(label=images), size = 3) +
   facet_wrap(~age_group) +
   theme_bw() +
   theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text.x = element_text(size = 14),
        strip.background = element_rect(colour="grey", fill="grey"),
        axis.title=element_blank(),
        axis.text=element_blank(),
        axis.ticks=element_blank())

Next Uncertainty E2

p2e_young= flatten(fromJSON("/Documents/GRADUATE_SCHOOL/Projects/next_kids/experiments/nextKids_pilot2/data/3-4yo/embedding_34_uncertainty.txt")) %>%
  select(darray, target.alt_description) %>%
  mutate(exp = 2,
         age_group = "young")

p2e_mid= flatten(fromJSON("/Documents/GRADUATE_SCHOOL/Projects/next_kids/experiments/nextKids_pilot2/data/5-6yo/embedding_56_uncertainty.txt"))%>%
    select(darray, target.alt_description) %>%
    mutate(exp = 2,
         age_group = "mid")

p2e_old= flatten(fromJSON("/Documents/GRADUATE_SCHOOL/Projects/next_kids/experiments/nextKids_pilot2/data/7-8yo/embedding_78_uncertainty.txt"))%>%
    select(darray, target.alt_description) %>%
    mutate(exp = 2,
         age_group = "old")

next_embed = rbind(p2e_young, p2e_mid, p2e_old) %>%
    mutate(images = unlist(lapply(str_split(target.alt_description, ".jpg"), 
                                   function(x) {x[1]}))) %>%
   select(-target.alt_description) %>%
   separate(darray, c("X", "Y"), ",") %>%
   mutate(X = as.numeric(gsub( "c\\(", "", X)),
          Y = as.numeric(gsub("\\)", "", Y)),
          age_group = as.factor(age_group),
          images = as.factor(images)) %>%
    mutate(age_group = fct_relevel(age_group, "young", "mid")) %>%
    left_join(image_dict) %>%
    select(-image_num)

Points

ggplot(next_embed, aes(x = X, y = Y, color = type, shape = theme)) +
   geom_point(size = 3) +
   facet_wrap(~age_group) +
   theme_bw() +
   theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text.x = element_text(size = 14),
        strip.background = element_rect(colour="grey", fill="grey"),
        axis.title=element_blank(),
        axis.text=element_blank(),
        axis.ticks=element_blank(),
        legend.position = "bottom")

Labels

ggplot(next_embed, aes(x = X, y = Y)) +
   geom_text(aes(label=images), size = 3) +
   facet_wrap(~age_group) +
   theme_bw() +
   theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text.x = element_text(size = 14),
        strip.background = element_rect(colour="grey", fill="grey"),
        axis.title=element_blank(),
        axis.text=element_blank(),
        axis.ticks=element_blank(),)