TED norming study 2 - stimulus selection

Based on TED2, determine (1) how many more participants to run, (2) which current items to include, and (3) whether there are new items we should create. Note we’re only using building time and visual complexity here.

d = read.csv("KE2_A.csv")

d.long = d %>%
  gather(variable, value, contains("_")) %>%
  mutate(trial_num =  unlist(lapply(strsplit(as.character(variable),
                                      "_"),function(x) x[2])),
         variable = unlist(lapply(strsplit(as.character(variable),
                                      "_"),function(x) x[1]))) %>%
  spread(variable, value) %>%
  mutate(trial_num = as.numeric(trial_num),
         rating = as.numeric(rating),
         obj = as.factor(obj),
         obj_lab = unlist(lapply(strsplit(as.character(obj),
                                      "_F"),function(x) x[1])),
         obj_lab = gsub('[[:punct:]]', '', obj_lab)) %>%
  filter(condition %in% c("building_time", "visual_complexity"))

Number of participants

d.long %>%
  group_by(subids) %>%
  slice(1) %>%
  ungroup () %>%
  count(condition) %>%
  kable()

condition	n
building_time	20
visual_complexity	20

Correlation

Between building time and visual complexity only.

images

condition.ratings = d.long %>%
  group_by(condition, obj) %>%
  multi_boot_standard(column = "rating") 

condition.ratings %>%
  select(-ci_lower, -ci_upper) %>%
  spread(condition, mean) %>%
  mutate(obj = paste0("thumbnails/", obj))  %>%
  ggplot(aes(y = visual_complexity, x = building_time)) +
  geom_smooth(method=lm) +
  geom_image(aes(image=obj), size = .04, by = "width") +
  ylab("visual complexity") +
  xlab("building time") +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 90, vjust = .3, hjust = 1))

labels

condition.ratings %>%
  select(-ci_lower, -ci_upper) %>%
  spread(condition, mean) %>%
  mutate(obj_lab = unlist(lapply(strsplit(as.character(obj),
                                      "_F"),function(x) x[1])),
         obj_lab = gsub('[[:punct:]]', '', obj_lab)) %>%
  ggplot(aes(y = visual_complexity, x = building_time)) +
  geom_smooth(method=lm) +
  geom_label(aes(label = obj_lab))+
  ylab("visual complexity") +
  xlab("building time") +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 90, vjust = .3, hjust = 1))

error bars

condition.ratings %>%
  gather(variable, value, -c(condition, obj)) %>%
  unite(temp, condition, variable) %>%
  spread(temp, value) %>%
  ggplot(aes(y = visual_complexity_mean, x = building_time_mean)) +
  geom_smooth(method=lm) +
  geom_pointrange(aes(ymin = visual_complexity_ci_lower, 
                      ymax = visual_complexity_ci_upper), size = .5) +
  geom_errorbarh(aes(xmax = building_time_ci_lower, 
                     xmin = building_time_ci_upper, height = 0)) +
  xlim(0,.85) +
  ylim(0,.9) +
  ylab("visual complexity") +
  xlab("building time") +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 90, vjust = .3, hjust = 1))

Difference scores

Triangle shape indicates difference is significantly different from zero.

Difference scores

diff.df = d.long %>%
  group_by(obj) %>%
  do(te = tidy(t.test(rating ~ condition, data = .)))  %>%
  mutate(diff = te$estimate,
         ci_lower = te$conf.low,
         ci_upper = te$conf.high,
         p = te$p.value,
         obj_lab = unlist(lapply(strsplit(as.character(obj),
                                      "_F"),function(x) x[1])),
         obj_lab = gsub('[[:punct:]]', '', obj_lab)) %>%
  select(-te) %>%
  arrange(diff) %>%
  mutate(sig = ifelse(p < .05, "sig", ""))

diff.df %>%
      mutate(obj_lab = fct_reorder(obj_lab, diff),
             obj = paste0("thumbnails/", obj),
             pic_height = rep(c(.4 ,.45, .5),n()/3))  %>%
  ggplot(aes(x =reorder(obj_lab,diff), y = diff)) +
  geom_image(aes(image=obj, y = pic_height), size = .04, by = "width") +

  geom_hline(yintercept = 0,color = "red") +
   geom_pointrange(aes(ymin = ci_lower, 
                      ymax = ci_upper, shape = sig), size = .6) +
  xlab("object name") +
  ylab("rating difference (BT-VC) \n higher -> more complex in terms of BT than VC" ) +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 90, vjust = .3, hjust = 1))

Difference scores - scaled

This is the same as the previous plot, but I scaled the ratings from both conditions first before calculating item means and differences scores. This doesn’t make a huge difference but might be the correct way to do this?

scale_this <- function(x){
  (x - mean(x, na.rm=TRUE)) / sd(x, na.rm=TRUE)
}

diff.scaled.df = d.long %>%
  group_by(obj) %>%
  mutate( rating.scale = scale_this(rating)) %>%
  do(te = tidy(t.test(rating.scale ~ condition, data = .)))  %>%
  mutate(diff = te$estimate,
         ci_lower = te$conf.low,
         ci_upper = te$conf.high,
         p = te$p.value,
         obj_lab = unlist(lapply(strsplit(as.character(obj),
                                      "_F"),function(x) x[1])),
         obj_lab = gsub('[[:punct:]]', '', obj_lab)) %>%
  select(-te) %>%
  arrange(diff) %>%
  mutate(sig = ifelse(p < .05, "sig", ""))
 
diff.scaled.df %>%
      mutate(obj_lab = fct_reorder(obj_lab, diff),
            obj = paste0("thumbnails/", obj),
            pic_height = rep(c(1.5 ,1.7, 1.9),n()/3))  %>%
  ggplot(aes(x =reorder(obj_lab,diff), y = diff)) +
  geom_image(aes(image=obj, y = pic_height), size = .04, by = "width") +
  geom_hline(yintercept = 0,color = "red") +
  geom_pointrange(aes(ymin = ci_lower, 
                      ymax = ci_upper, shape = sig), size = .6) +
  xlab("object name") +
  ylab("rating difference (BT-VC) \n higher -> more complex in terms of BT than VC" ) +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 90, vjust = .3, hjust = 1))

TED norming study 2 - stimulus selection

Molly Lewis

2017-05-03

Number of participants

Correlation

images

labels

error bars

Difference scores

Difference scores

Difference scores - scaled