This file produces some heatmaps for imputations.
First we read in some data.
library(tidyverse)
library(reshape2)
library(plyr)
library(doBy)
library(scales)
CleanData <- read.csv("F:/Google Drive/GitHub Repos/ste-phen/summer_school_sessions/5_predictions_in_python/CleanData.csv")
simdata <- subset(CleanData, DataSet == "Simulated")
pilotdata <- subset(CleanData, DataSet == "Pilot")
affectdata <- subset(CleanData, DataSet == "Affect")
CleanDataAgg <- summaryBy(Response ~
DataSet + Inducer + Concurrent + Comparison,
data= CleanData, Fun = c(mean))
Domains <- sort(unique(CleanDataAgg$Inducer))
HighValues <- c("Excited/Bored", "Happy/Sad", "Pleased/Disgusted", "Stressed/Calm",
"Loud/Quiet", "Bright/Dark", "Red/Blue", "Red/Green", "Red/Yellow",
"Yellow/Blue", "Noisy/Tonal", "High Pitch/Low Pitch", "Jagged/Curvy",
"Large/Small", "Fast/Slow")
CleanDataAgg$Inducer2 <- mapvalues(CleanDataAgg$Inducer, from = Domains, to= HighValues)
CleanDataAgg$Concurrent2 <- mapvalues(CleanDataAgg$Concurrent, from = Domains, to= HighValues)
AffectDataAgg <- subset(CleanDataAgg, DataSet == "Affect")
ggplot(data= AffectDataAgg, aes(x=Concurrent2, y=Inducer2, fill=Response.mean)) +
geom_tile(color = "white") +
ggtitle("Biases - Affect Data") +
scale_fill_gradient2(low = "red", high = "blue", mid = "white",
midpoint = 0.5, limit = c(0,1),
name="Direction and Strength of Associaton") +
theme_classic()+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))+
coord_fixed()
So the above heatmap is what our Collected Affect Data looks like - what we are trying to impute is the big white square in the top right - all of the comparisons that we have not collected.
We can take the imputed predictions that Bill (or you) has produced and give a heatmap of those imputations in addition to the collected data- they look like this:
ImputedPredictions <- read.csv("F:/Google Drive/GitHub Repos/ste-phen/summer_school_sessions/5_predictions_in_python/ImputedPredictions.csv")
ImputedPredictions <- subset(ImputedPredictions, select = c(Comparison, ImputedMean))
ImputedPredictions <- separate(data = ImputedPredictions, col = Comparison,
into = c("Left", "Right"),
sep = "-", remove = FALSE)
Imp1 <- ImputedPredictions
Imp2 <- ImputedPredictions
Imp1$Inducer <- Imp1$Left
Imp1$Concurrent <- Imp1$Right
Imp2$Inducer <- Imp2$Right
Imp2$Concurrent <- Imp2$Left
ImputedPredictions <- rbind(Imp1, Imp2)
ImputedPredictions <- subset(ImputedPredictions, select = c("Inducer", "Concurrent",
"Comparison", "ImputedMean"))
Domains <- sort(unique(ImputedPredictions$Inducer))
HighValues <- c("Excited/Bored", "Happy/Sad", "Pleased/Disgusted", "Stressed/Calm",
"Loud/Quiet", "Bright/Dark", "Red/Blue", "Red/Green", "Red/Yellow",
"Yellow/Blue", "Noisy/Tonal", "High Pitch/Low Pitch", "Jagged/Curvy",
"Large/Small", "Fast/Slow")
ImputedPredictions$Inducer2 <- mapvalues(ImputedPredictions$Inducer, from = Domains, to= HighValues)
ImputedPredictions$Concurrent2 <- mapvalues(ImputedPredictions$Concurrent, from = Domains, to= HighValues)
ImputedPredictions$Inducer2 <- factor(ImputedPredictions$Inducer2,
level = c("Excited/Bored", "Happy/Sad", "Pleased/Disgusted",
"Stressed/Calm", "Loud/Quiet", "Bright/Dark", "Red/Blue",
"Red/Green", "Red/Yellow", "Yellow/Blue", "Noisy/Tonal",
"High Pitch/Low Pitch", "Jagged/Curvy", "Large/Small",
"Fast/Slow"))
ImputedPredictions$Concurrent2 <- factor(ImputedPredictions$Concurrent2,
level = c("Excited/Bored", "Happy/Sad", "Pleased/Disgusted",
"Stressed/Calm", "Loud/Quiet", "Bright/Dark", "Red/Blue",
"Red/Green", "Red/Yellow", "Yellow/Blue", "Noisy/Tonal",
"High Pitch/Low Pitch", "Jagged/Curvy", "Large/Small",
"Fast/Slow"))
ggplot(data= ImputedPredictions, aes(x=Concurrent2, y=Inducer2, fill=ImputedMean)) +
geom_tile(color = "white") +
ggtitle("Biases - Imputed from Affect") +
scale_fill_gradient2(low = "red", high = "blue", mid = "white",
midpoint = 0.5, limit = c(0,1),
name="Direction and Strength of Associaton") +
theme_classic()+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))+
coord_fixed()
To see how well this actually predicts our data, we need to take a look at the difference between the pilot data and the imputed predictions from affect
We can do that below
PilotDataAgg <- subset(CleanDataAgg, DataSet == "Pilot")
PilotDataAgg <- arrange(PilotDataAgg, Comparison)
ggplot(data= PilotDataAgg, aes(x=Concurrent2, y=Inducer2, fill=Response.mean)) +
geom_tile(color = "white") +
ggtitle("Biases - Pilot Data") +
scale_fill_gradient2(low = "red", high = "blue", mid = "white",
midpoint = 0.5, limit = c(0,1),
name="Direction and Strength of Associaton") +
theme_classic()+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))+
coord_fixed()
ImputedPredictions <-arrange(ImputedPredictions, Comparison)
PilotDataAgg$Imputed <- ImputedPredictions$ImputedMean
PilotDataAgg$Difference <- abs(PilotDataAgg$Response.mean - PilotDataAgg$Imputed)
ggplot(data= PilotDataAgg, aes(x=Concurrent2, y=Inducer2, fill=Difference)) +
geom_tile(color = "white") +
ggtitle("Biases - Mismatch Between Imputed and Actual Values") +
scale_fill_gradient2(low = "white", high = "red",
name="Mismatch") +
theme_classic()+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))+
coord_fixed()