JooHeatmap

So that’s the dataframe read in- 1 means a strong positive correlation, .5 weak, 0 neutral, -.5 weak negative, -1 strong negative (just because I don’t have the actual data).

If I had the data, I’d probably do a proper heatmap of False Discovery rates (or their inverses), which I’ll try to give you code for below

The first thing we need to do with data of this type is take it from wide format to long format, so lets do that

library(reshape2)

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

jooDataMelt <- melt(jooData,
                     variable.name = "Class",
                     value.name = "Sig",
                    id.variable = "Meaning")

## Using Meaning as id variables

#Replace NA (empty) values with 0
jooDataMelt[is.na(jooDataMelt)] <- 0

From this data it’s pretty simple to output a heatmap

ggplot(data= jooDataMelt, aes(x=Class, y=Meaning, fill=Sig)) +
  geom_tile(color = "white") +
  # ggtitle("Associations") +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", 
                       midpoint = 0, limit = c(-1,1),
                       name="Direction and Strength of Associaton") +
  #geom_text(aes(label = round(Response, 2)), size = 3) +
  theme_classic()+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))+
  coord_fixed()

ggsave("JooHeatmap1.png", plot = last_plot(), device = NULL, path = NULL,
  width = 8, height = 12, units = c("in", "cm", "mm"),
  dpi = 600)

Okay so that maybe doesn’t look super informative- lets get rid of some of the empty rows (which you also don’t present in your tables)

#Count the number of NAs for each row
jooData$NAs <- rowSums(is.na(jooData))

#Exclude rows that have 13 NA values (no significant correlations)
jooDataSub <- subset(jooData, NAs!= 13)

#remove the count column
jooDataSub <- subset(jooDataSub, select = -c(NAs))

#Put the trimmed data into longform
jooDataSubMelt <- melt(jooDataSub,
                     variable.name = "Class",
                     value.name = "Sig",
                    id.variable = "Meaning")

## Using Meaning as id variables

#Replace NA (empty) values with 0
jooDataSubMelt[is.na(jooDataSubMelt)] <- 0

#output heatmap
ggplot(data= jooDataSubMelt, aes(x=Class, y=Meaning, fill=Sig)) +
  geom_tile(color = "white") +
  # ggtitle("Associations") +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", 
                       midpoint = 0, limit = c(-1,1),
                       name="Direction and Strength of Associaton") +
  #geom_text(aes(label = round(Response, 2)), size = 3) +
  theme_classic()+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))+
  coord_fixed()

ggsave("JooHeatmap2.png", plot = last_plot(), device = NULL, path = NULL,
  width = 8, height = 8, units = c("in", "cm", "mm"),
  dpi = 600)

Okay so again maybe not the most excellent- what you rreally want is a plot of your actual False Discovery rates (or whatever dependent variable you desire), maybe with some marker of what level of significance they reached

So i’ll simulate some data here with the following assumptions:

For highly significant things, i’ll randomly choose a FDR between 0 and 0.05 For less significant things, i’ll randomly choose a FDR between 0.05 and 0.1 For non-significant things, i’ll randomly choose a FDR between .1 and 1

I’ll sample all of these from normal distributions so they aren’t too wonky (this shouldn’t matter- you should be able to use the actual values here)

#split up the data for different operations
jooDataNS <- subset(jooDataSubMelt, Sig == 0)
jooDataWeak <- subset(jooDataSubMelt, abs(Sig) == 0.5)
jooDataStrong <- subset(jooDataSubMelt, abs(Sig) == 1)

#generate FDR values

#for NS values generate FDR 
jooDataNS$FDR <- rnorm(nrow(jooDataNS), mean = 0.5, sd = 0.2)
#transform values over 1 and below 0.1
jooDataNS$FDR <- ifelse(jooDataNS$FDR < 0.1, 0.1, jooDataNS$FDR)
jooDataNS$FDR <- ifelse(jooDataNS$FDR > 1, .99, jooDataNS$FDR)

#for Weak values generate FDR
jooDataWeak$FDR <- rnorm(nrow(jooDataWeak), mean = 0.075, sd= 0.01)
#transform values over 0.1 and under 0.05
jooDataWeak$FDR <- ifelse(jooDataWeak$FDR < 0.05, 0.051, jooDataWeak$FDR)
jooDataWeak$FDR <- ifelse(jooDataWeak$FDR > .1, .11, jooDataWeak$FDR)

#for Strong values generate FDR
jooDataStrong$FDR <- rnorm(nrow(jooDataStrong), mean = 0.025, sd= 0.01)
#transform values over 0.05 and under 0
jooDataStrong$FDR <- ifelse(jooDataStrong$FDR < 0, 0.01, jooDataStrong$FDR)
jooDataStrong$FDR <- ifelse(jooDataStrong$FDR > .05, .049, jooDataStrong$FDR)


#Recombine the datasets
jooDataFDR <- rbind(jooDataNS, jooDataWeak, jooDataStrong)

If we heatmap this data it wont’ look great- generally for a heatmap you want larger values to be brighter, so we’ll take the inverse of the false discovery rate (this suggests that you should actually plot whatever value you put into your Benjamini Hochberg- your z test values I think) and heatmap that out

jooDataFDR$invFDR <- 1- jooDataFDR$FDR

ggplot(data= jooDataFDR, aes(x=Class, y=Meaning, fill=invFDR)) +
  geom_tile(color = "white") +
  # ggtitle("Associations") +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", 
                       midpoint = 0, limit = c(-1,1),
                       name="Direction and Strength of Associaton") +
  #geom_text(aes(label = round(Response, 2)), size = 3) +
  theme_classic()+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))+
  coord_fixed()

ggsave("JooHeatmap3.png", plot = last_plot(), device = NULL, path = NULL,
  width = 8, height = 8, units = c("in", "cm", "mm"),
  dpi = 600)

So that gives us a heatmap, but not an exceptionally informative one because we’ve lost the directionality (positive vs. negative associations) - lets put that back in then pretty things up a bit

library(plyr)

## -------------------------------------------------------------------------

## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)

## -------------------------------------------------------------------------

## 
## Attaching package: 'plyr'

## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following object is masked from 'package:purrr':
## 
##     compact

jooDataFDR$invFDR2 <- ifelse(jooDataFDR$Sig < 0, jooDataFDR$invFDR * -1, jooDataFDR$invFDR)

jooDataFDR$FeatureType <- mapvalues(jooDataFDR$Class, 
                                    from = c('Labial', "Coronal", "Dorsal", "Stop", "Fricative", "Nasal", "Approximant", 
                                    "High", "Low", "Front", "Back", "Rounded", "Unrounded"),
                                    to = c(rep("Consonantal", 7), rep("Vowel",6)))

jooDataFDR$Sig2 <- mapvalues(jooDataFDR$Sig,
                             from = c(-1, -0.5, 0, 0.5, 1),
                             to = c("**", "*", "", "*", "**"))



ggplot(data= jooDataFDR, aes(x=Class, y=Meaning, fill=invFDR2)) +
  geom_tile(color = "white") +
  # ggtitle("Associations") +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", 
                       midpoint = 0, limit = c(-1,1),
                       name="Direction and Strength of Associaton") +
  geom_text(aes(label = Sig2), size = 3) +
  theme_classic()+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))+
  coord_fixed() +
  facet_grid(~ FeatureType, scales = "free", space = "free", switch = 'x')

ggsave("JooHeatmap4.png", plot = last_plot(), device = NULL, path = NULL,
  width = 8, height = 8, units = c("in", "cm", "mm"),
  dpi = 600)

JooHeatmap

Alan Nielsen

February 20, 2018