#Load packages
library(tidyverse)
library(harrietr)
library(ggdendro)
library(ggimage)
library(dendroextras)

####function for computing distance matrix
get_distance <- function(current_d,normalize_dist=T) {
  
  rownames(current_d) <- current_d$image
  
  d_dist <- current_d %>%
    dplyr::select(posX,posY) %>%
    as.matrix() %>%
    dist(diag=T,upper=T)
  
  if (normalize_dist) {
    d_dist <- d_dist/(max(d_dist))
  }
  
  d_dist
}

###convert long data to dist object ###
long_to_dist <- function(d_long, colnames=c("item1","item2"),dist_name="avg_dist") {
  #add zero distance columns (to avoid some problems when converting to a matrix)
  zero_distance_d <- data.frame(
    col1 = unique(c(pull(d_long[colnames[1]]),pull(d_long[colnames[2]]))),
    col2 = unique(c(pull(d_long[colnames[1]]),pull(d_long[colnames[2]]))), 
    col3 = 0)
  
  colnames(zero_distance_d) <- c(colnames,dist_name)
  
  #add zero distance columns to long data frame
  d_long <- bind_rows(d_long,zero_distance_d)
  
  #convert long data to wide
  dist_wide <- d_long %>%
    select(c(colnames,dist_name)) %>%
    spread(colnames[1], dist_name)
  #add rownames
  rownames(dist_wide) <- dist_wide$item2
  
  dist_wide <- dist_wide %>%
    select(-c(colnames[2]))
  
  #convert to matrix and then to dist object
  dist <- as.matrix(dist_wide) %>%
    as.dist()
  
  dist
}

Single subject analysis - Example

Practice

##read in data
d <- read.csv("../Grid_data/s101_EmotionGrid_v4_Practice.csv")

#rename rows to image names
rownames(d) <- d$image

#creates distance object 
d_dist <- d %>%
  dplyr::select(posX,posY) %>%
  as.matrix() %>%
  dist()

#do hierarchical clustering
d_hclust <- hclust(d_dist)

#plot
plot(d_hclust)

Sort 1

##read in data
d <- read.csv("../Grid_data/s101_EmotionGrid_v4_Sort1.csv")

#rename rows to image names
rownames(d) <- d$image

#creates distance object 
d_dist <- d %>%
  dplyr::select(posX,posY) %>%
  as.matrix() %>%
  dist()

#do hierarchical clustering
d_hclust <- hclust(d_dist)

#plot
plot(d_hclust)

Sort 2

##read in data
d <- read.csv("../Grid_data/s101_EmotionGrid_v4_Sort2.csv")

#rename rows to image names
rownames(d) <- d$image

#creates distance object 
d_dist <- d %>%
  dplyr::select(posX,posY) %>%
  as.matrix() %>%
  dist()

#do hierarchical clustering
d_hclust <- hclust(d_dist)

#plot
plot(d_hclust)

Average across subjects

d <- read.csv("../Grid_data/processed_data/GridTask_allData.csv")

# For group as a whole:
# -each child has scaled distance matrix (0 to 1)
    #within each kid divide by the maximum
# -average across all children on the scaled values

#doing this by sort


#create overall data frame containing (nested) distance objects for each participant
subj_dist_byGroup <- d %>%
  group_by(participant, sort) %>%
  nest() %>%
  mutate(dist_object = map(data, get_distance)) %>%
  mutate(dist_matrix = map(dist_object, as.matrix)) %>%
  mutate(dist_long= map(dist_matrix,melt_dist)) %>%
  select(-data)

#create long dataframe with pairwise distances (normalized)
subj_dist_long <-  subj_dist_byGroup %>%
  select(-dist_object,-dist_matrix) %>%
  unnest() %>%
  rename(item1=iso1,item2=iso2) %>%
  mutate(items = paste(pmin(item1, item2), #alphabetically order
                        pmax(item1, item2), sep= "-")) %>%
  select(-item1,-item2) %>%
  separate(items, into=c("item1","item2"),sep="-")

#average across all distances
avg_dist_long <- subj_dist_long %>%
  group_by(sort,item1,item2) %>%
  summarize(avg_dist=mean(dist))

#average distance object organized by sorting group
avg_dist <- avg_dist_long %>%
  group_by(sort) %>%
  nest() %>%
  mutate(dist_obj = map(data, long_to_dist))

Average Practice Cluster

#store current hirarchical cluster
cur_dist <- filter(avg_dist,sort=="Practice")$dist_obj[[1]]
cur_cluster <- hclust(cur_dist)

#simple cluster plot
plot(cur_cluster)

#create image paths in dendrogram order (with dendroextras package)
image_paths <- paste("../stimuli_practice/",labels(cur_cluster),".png",sep="")

#add images
cur_images <- data.frame(
  label=labels(cur_cluster),
  image=image_paths,
  x=seq(1,length(image_paths)),
  y=rep(-0.05,length(image_paths)))

#plot dendrogram
ggdendrogram(cur_cluster, rotate = T)+
  geom_image(data=cur_images,aes(x=x,y=y,image=image),size=0.05)

Plot Multidimensional Scaling of Average Distances

cur_cmd <- data.frame(cmdscale(cur_dist))
cur_cmd$label <- rownames(cur_cmd)
cur_cmd <- merge(cur_cmd,cur_images)

ggplot(cur_cmd,aes(X1,X2,label=label,image=image))+
  geom_point(size=2,shape=21,stroke=2)+
  geom_text(aes(y=X2-0.03))+
  geom_image(aes(x=X1+0.05),size=0.08)

Average Sort1 Cluster

#store current hirarchical cluster
cur_dist <- filter(avg_dist,sort=="Sort1")$dist_obj[[1]]
cur_cluster <- hclust(cur_dist)

#simple cluster plot
plot(cur_cluster)

#create image paths in dendrogram order (with dendroextras package)
image_paths <- paste("../stimuli_sort1/",labels(cur_cluster),".png",sep="")

#add images
cur_images <- data.frame(
  label=labels(cur_cluster),
  image=image_paths,
  x=seq(1,length(image_paths)),
  y=rep(c(-0.05,-0.1),length(image_paths)/2))

#plot dendrogram
ggdendrogram(cur_cluster, rotate = T)+
  geom_image(data=cur_images,aes(x=x,y=y,image=image),size=0.05)

Plot Multidimensional Scaling of Average Distances

cur_cmd <- data.frame(cmdscale(cur_dist))
cur_cmd$label <- rownames(cur_cmd)
cur_cmd <- merge(cur_cmd,cur_images)

ggplot(cur_cmd,aes(X1,X2,label=label,image=image))+
  geom_point(size=2,shape=21,stroke=2)+
  geom_text(aes(y=X2-0.03))+
  geom_image(aes(x=X1+0.05),size=0.08)

Average Sort2 Cluster

#store current hirarchical cluster
cur_dist <- filter(avg_dist,sort=="Sort2")$dist_obj[[1]]
cur_cluster <- hclust(cur_dist)

#simple cluster plot
plot(cur_cluster)

#create image paths in dendrogram order (with dendroextras package)
image_paths <- paste("../stimuli_sort2/",labels(cur_cluster),".png",sep="")

#add images
cur_images <- data.frame(
  label=labels(cur_cluster),
  image=image_paths,
  x=seq(1,length(image_paths)),
  y=rep(c(-0.05,-0.1),length(image_paths)/2))

#plot dendrogram
ggdendrogram(cur_cluster, rotate = T)+
  geom_image(data=cur_images,aes(x=x,y=y,image=image),size=0.05)

Plot Multidimensional Scaling of Average Distances

cur_cmd <- data.frame(cmdscale(cur_dist))
cur_cmd$label <- rownames(cur_cmd)
cur_cmd <- merge(cur_cmd,cur_images)

ggplot(cur_cmd,aes(X1,X2,label=label,image=image))+
  geom_point(size=2,shape=21,stroke=2)+
  geom_text(aes(y=X2-0.03))+
  geom_image(aes(x=X1+0.05),size=0.08)