Heatmaps for Prediction and Imputation

This file produces some heatmaps for imputations.

First we read in some data.

library(tidyverse)
library(reshape2)
library(plyr)
library(doBy)
library(scales)


CleanData <- read.csv("F:/Google Drive/GitHub Repos/ste-phen/summer_school_sessions/5_predictions_in_python/CleanData.csv")
simdata <- subset(CleanData, DataSet == "Simulated")
pilotdata <- subset(CleanData, DataSet == "Pilot")
affectdata <- subset(CleanData, DataSet == "Affect")

CleanDataAgg <- summaryBy(Response ~ 
                       DataSet + Inducer + Concurrent + Comparison,
                     data= CleanData, Fun = c(mean))

Domains <- sort(unique(CleanDataAgg$Inducer))
HighValues <- c("Excited/Bored", "Happy/Sad", "Pleased/Disgusted", "Stressed/Calm",
                "Loud/Quiet", "Bright/Dark", "Red/Blue", "Red/Green", "Red/Yellow",
                "Yellow/Blue", "Noisy/Tonal", "High Pitch/Low Pitch", "Jagged/Curvy", 
                "Large/Small", "Fast/Slow")

CleanDataAgg$Inducer2 <- mapvalues(CleanDataAgg$Inducer, from = Domains, to= HighValues)
CleanDataAgg$Concurrent2 <- mapvalues(CleanDataAgg$Concurrent, from = Domains, to= HighValues)

AffectDataAgg <- subset(CleanDataAgg, DataSet == "Affect")

ggplot(data= AffectDataAgg, aes(x=Concurrent2, y=Inducer2, fill=Response.mean)) +
  geom_tile(color = "white") +
  ggtitle("Biases - Affect Data") +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", 
                       midpoint = 0.5, limit = c(0,1),
                       name="Direction and Strength of Associaton") +
  theme_classic()+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))+
  coord_fixed()

So the above heatmap is what our Collected Affect Data looks like - what we are trying to impute is the big white square in the top right - all of the comparisons that we have not collected.

We can take the imputed predictions that Bill (or you) has produced and give a heatmap of those imputations in addition to the collected data- they look like this:

ImputedPredictions <- read.csv("F:/Google Drive/GitHub Repos/ste-phen/summer_school_sessions/5_predictions_in_python/ImputedPredictions.csv")

ImputedPredictions <- subset(ImputedPredictions, select = c(Comparison, ImputedMean))

ImputedPredictions <- separate(data = ImputedPredictions, col = Comparison,
                      into = c("Left", "Right"), 
                      sep = "-", remove = FALSE)

Imp1 <- ImputedPredictions
Imp2 <- ImputedPredictions

Imp1$Inducer <- Imp1$Left
Imp1$Concurrent <- Imp1$Right

Imp2$Inducer <- Imp2$Right
Imp2$Concurrent <- Imp2$Left

ImputedPredictions <- rbind(Imp1, Imp2)

ImputedPredictions <- subset(ImputedPredictions, select = c("Inducer", "Concurrent", 
                                                            "Comparison", "ImputedMean"))

Domains <- sort(unique(ImputedPredictions$Inducer))
HighValues <- c("Excited/Bored", "Happy/Sad", "Pleased/Disgusted", "Stressed/Calm",
                "Loud/Quiet", "Bright/Dark", "Red/Blue", "Red/Green", "Red/Yellow",
                "Yellow/Blue", "Noisy/Tonal", "High Pitch/Low Pitch", "Jagged/Curvy", 
                "Large/Small", "Fast/Slow")

ImputedPredictions$Inducer2 <- mapvalues(ImputedPredictions$Inducer, from = Domains, to= HighValues)
ImputedPredictions$Concurrent2 <- mapvalues(ImputedPredictions$Concurrent, from = Domains, to= HighValues)

ImputedPredictions$Inducer2 <- factor(ImputedPredictions$Inducer2, 
                                  level = c("Excited/Bored", "Happy/Sad", "Pleased/Disgusted",
                                            "Stressed/Calm", "Loud/Quiet", "Bright/Dark", "Red/Blue",
                                            "Red/Green", "Red/Yellow", "Yellow/Blue", "Noisy/Tonal",
                                            "High Pitch/Low Pitch", "Jagged/Curvy", "Large/Small",
                                            "Fast/Slow"))

ImputedPredictions$Concurrent2 <- factor(ImputedPredictions$Concurrent2, 
                                  level = c("Excited/Bored", "Happy/Sad", "Pleased/Disgusted",
                                            "Stressed/Calm", "Loud/Quiet", "Bright/Dark", "Red/Blue",
                                            "Red/Green", "Red/Yellow", "Yellow/Blue", "Noisy/Tonal",
                                            "High Pitch/Low Pitch", "Jagged/Curvy", "Large/Small",
                                            "Fast/Slow"))


ggplot(data= ImputedPredictions, aes(x=Concurrent2, y=Inducer2, fill=ImputedMean)) +
  geom_tile(color = "white") +
  ggtitle("Biases - Imputed from Affect") +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", 
                       midpoint = 0.5, limit = c(0,1),
                       name="Direction and Strength of Associaton") +
  theme_classic()+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))+
  coord_fixed()

To see how well this actually predicts our data, we need to take a look at the difference between the pilot data and the imputed predictions from affect

We can do that below

PilotDataAgg <- subset(CleanDataAgg, DataSet == "Pilot")
PilotDataAgg <- arrange(PilotDataAgg, Comparison)

ggplot(data= PilotDataAgg, aes(x=Concurrent2, y=Inducer2, fill=Response.mean)) +
  geom_tile(color = "white") +
  ggtitle("Biases - Pilot Data") +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", 
                       midpoint = 0.5, limit = c(0,1),
                       name="Direction and Strength of Associaton") +
  theme_classic()+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))+
  coord_fixed()

ImputedPredictions <-arrange(ImputedPredictions, Comparison)

PilotDataAgg$Imputed <- ImputedPredictions$ImputedMean

PilotDataAgg$Difference <- abs(PilotDataAgg$Response.mean - PilotDataAgg$Imputed)

ggplot(data= PilotDataAgg, aes(x=Concurrent2, y=Inducer2, fill=Difference)) +
  geom_tile(color = "white") +
  ggtitle("Biases - Mismatch Between Imputed and Actual Values") +
  scale_fill_gradient2(low = "white", high = "red", 
                       name="Mismatch") +
  theme_classic()+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))+
  coord_fixed()

Heatmaps for Prediction and Imputation

Alan Nielsen

September 20, 2017