https://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html

Categories are flower names - we need similarity by shape and color

https://www.robots.ox.ac.uk/~vgg/data/flowers/102/categories.html

imagelabels.mat are the image labels/categories

distancematrices102.mat are the &Chi^2 distances from this pub https://www.robots.ox.ac.uk/~vgg/publications/2008/Nilsback08/

knitr::opts_chunk$set(warning = FALSE)
library(here) 
## here() starts at /Users/jessicamankewitz/projects/adaptive-agents
library(rmatio) #for reading .mat files
library(tidyverse) #for everything else
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(cowplot) #plotting the grids
library(ggplot2)
#try to read in labels

image_labels <- read.mat(here("stimuli_sets/flowers/imagelabels.mat"))
distances <- read.mat(here("stimuli_sets/flowers/distancematrices102.mat"))

flower_images <- list.files(here("stimuli_sets/flowers/images"))
D_siftint <- distances[1]
D_siftint.matrix <- as.matrix(D_siftint[[1]], nrows = 8189, ncols = 8189)

D_hsv <- distances[2]
D_hsv.matrix <- as.matrix(D_hsv[[1]], nrows = 8189, ncols = 8189)

D_siftbdy <- distances[3]
D_siftbdy.matrix <- as.matrix(D_siftbdy[[1]], nrows = 8189, ncols = 8189)

D_hog <- distances[4]
D_hog.matrix <- as.matrix(D_hog[[1]], nrows = 8189, ncols = 8189)

Get 11 most similar images for a seed image

get_similarity_df <- function(seed_image){
  image_index <- as.numeric(gsub(".jpg", "", (gsub("image_0", "", seed_image))))
  image_category <- image_labels[[1]][image_index]
  similarities.df <- data.frame(comp_image_file = flower_images,
                                comp_image = seq(1:8189+1), 
                                comp_category = image_labels[[1]], 
                                D_siftint = D_siftint.matrix[image_index,],
                                D_hsv = D_hsv.matrix[image_index,],
                                D_siftbdy = D_siftbdy.matrix[image_index,],
                                D_hog = D_hog.matrix[image_index, ]) %>%
  mutate(target_image = image_index, target_category = image_category)
  
  return(similarities.df)
}
plot_image <- function(image_path) { 
  p <- ggdraw() + draw_image(paste0(here("stimuli_sets/flowers/images/", image_path)))
  return(p)}

Random Image - Same colors, different flower

set.seed(42)
seed_image <- sample(flower_images, 1) #random starting flower
print(seed_image)
## [1] "image_02609.jpg"
random_seed_similarity <- get_similarity_df(seed_image) #get pairwise similarity measures

random_stim <- append(seed_image,
                      random_seed_similarity %>% 
                        # Select from out of category
                        filter(comp_category != target_category) %>% 
                        # slice most similar image from each category
                        group_by(comp_category) %>% 
                        #use D_hsv for color
                        slice_min(order_by = D_hsv, n = 1) %>% 
                        arrange(D_hsv) %>% ungroup() %>% 
                        #pull the most similar images
                        head(11) %>% pull(comp_image_file))

#plot
plot_grid(plotlist = lapply(random_stim, plot_image))

Example Sets - purple, yellow, red, white

Purple Flowers - Same color, diff categories

purple_seed = "image_05355.jpg"
purple_sim <- append(purple_seed, 
                     get_similarity_df(purple_seed) %>% 
                       #get out of category items
                       filter(comp_category != target_category) %>% 
                       group_by(comp_category) %>% slice_min(order_by = D_hsv, n = 1) %>%
                       arrange(D_hsv) %>% ungroup() %>% 
                       head(11) %>% pull(comp_image_file))

plot_grid(plotlist = lapply(purple_sim, plot_image))

Yellow Flowers

yellow_seed = "image_06676.jpg"
yellow_sim <- append(yellow_seed,
                     get_similarity_df(yellow_seed) %>% 
                       filter(comp_category != target_category) %>% #get out of category items
                       group_by(comp_category) %>% slice_min(order_by = D_hsv, n = 1) %>%
                       arrange(D_hsv) %>% ungroup() %>% 
                       head(11) %>% pull(comp_image_file))

plot_grid(plotlist = lapply(yellow_sim, plot_image))

Red Flowers

red_seed = "image_06787.jpg"
red_sim <- append(red_seed, 
                  get_similarity_df(red_seed) %>% 
                    filter(comp_category != target_category) %>% #get out of category items
                    group_by(comp_category) %>% slice_min(order_by = D_hsv, n = 1) %>%
                    arrange(D_hsv) %>% ungroup() %>% 
                    head(11) %>% pull(comp_image_file))

plot_grid(plotlist = lapply(red_sim, plot_image))

White FLowers

white_seed = "image_07057.jpg"
white_sim <- append(white_seed,
                    get_similarity_df(white_seed) %>% 
                      filter(comp_category != target_category) %>% #get out of category items
                      group_by(comp_category) %>% slice_min(order_by = D_hsv, n = 1) %>%
                      arrange(D_hsv) %>% ungroup() %>% 
                      head(11) %>% pull(comp_image_file))

plot_grid(plotlist = lapply(white_sim, plot_image))

Example Subsets

Examples of sets of 4 images from the 12 similar images Are these similar enough once put in groups of 4? Are they too easy to name? nb: need to remove the date or replace that one image

#cross of all 20 items, then select the first 4 items then distinct = all combinations of a subset of 4
plot_4x4 <- function(image_list){
  plot_grid(plotlist = lapply(image_list, plot_image))
}

image_combos <- arrangements::permutations(purple_sim, 4, replace = FALSE)
sampled_image_combos <- image_combos[sample(nrow(image_combos), 
                                            size = 5, replace = FALSE), ]

apply(X = sampled_image_combos, 
      MARGIN = 1,
      FUN = plot_4x4)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

Same Flower type, different categories

nb: This will give us the 11 most dissimilar flowers to the seed flower, but tells us nothing about the similarity with each of the other flowers. If the sed flower is highly atypical, we might get a set of prototypical flowers that are similar to each other

diff_seed = "image_00166.jpg"
sim_diff <- append(diff_seed,
                   get_similarity_df(diff_seed) %>% 
                     #search within category
                     filter(comp_category == target_category) %>% 
                     #most dissimilar
                     arrange(-D_hsv) %>% 
                     head(11) %>% pull(comp_image_file))

plot_grid(plotlist = lapply(sim_diff, plot_image))