I love barplots! Here’s a simple function that allows me to do some quick 2-way exploration of my data when I have categorical variables.

Let’s give ourself some play-data:

gender = c("Male", "Female")
age_band = c("[16-24]", "[25-49]", "50+")
buying_ratio = c("1.Low", "2.Medium", "3.High")

n = 100

gender_v = sample(gender, 100, replace=T)
age_band_v = sample(age_band, 100, replace=T)
buying_ratio = sample(buying_ratio, 100, replace=T)

df = data.frame(gender=gender_v, age_band=age_band_v, buying_ratio=buying_ratio)

df
##     gender age_band buying_ratio
## 1   Female  [16-24]       3.High
## 2     Male      50+        1.Low
## 3   Female  [25-49]        1.Low
## 4     Male  [25-49]        1.Low
## 5     Male  [16-24]        1.Low
## 6     Male  [16-24]        1.Low
## 7     Male      50+        1.Low
## 8     Male  [25-49]     2.Medium
## 9     Male  [16-24]     2.Medium
## 10  Female  [25-49]       3.High
## 11    Male      50+        1.Low
## 12    Male  [25-49]     2.Medium
## 13  Female  [25-49]        1.Low
## 14  Female  [25-49]        1.Low
## 15    Male  [25-49]        1.Low
## 16  Female  [25-49]       3.High
## 17    Male  [16-24]     2.Medium
## 18    Male      50+       3.High
## 19    Male  [25-49]     2.Medium
## 20  Female  [16-24]       3.High
## 21  Female  [16-24]        1.Low
## 22  Female  [25-49]     2.Medium
## 23  Female  [16-24]       3.High
## 24  Female      50+       3.High
## 25  Female  [25-49]     2.Medium
## 26    Male  [25-49]       3.High
## 27    Male  [16-24]        1.Low
## 28    Male      50+        1.Low
## 29  Female      50+        1.Low
## 30    Male  [16-24]        1.Low
## 31    Male  [25-49]     2.Medium
## 32  Female  [25-49]       3.High
## 33  Female      50+     2.Medium
## 34  Female  [25-49]        1.Low
## 35    Male  [25-49]        1.Low
## 36    Male  [16-24]     2.Medium
## 37  Female  [25-49]       3.High
## 38  Female  [16-24]        1.Low
## 39    Male  [25-49]        1.Low
## 40    Male  [25-49]     2.Medium
## 41    Male  [25-49]     2.Medium
## 42    Male  [25-49]        1.Low
## 43    Male      50+       3.High
## 44    Male      50+       3.High
## 45    Male      50+       3.High
## 46    Male      50+        1.Low
## 47  Female  [16-24]        1.Low
## 48    Male  [16-24]       3.High
## 49    Male      50+     2.Medium
## 50  Female  [25-49]       3.High
## 51    Male      50+     2.Medium
## 52  Female  [16-24]       3.High
## 53  Female      50+       3.High
## 54    Male  [25-49]       3.High
## 55    Male  [25-49]     2.Medium
## 56    Male      50+        1.Low
## 57    Male  [16-24]       3.High
## 58    Male      50+     2.Medium
## 59  Female  [25-49]     2.Medium
## 60  Female      50+     2.Medium
## 61    Male  [25-49]       3.High
## 62  Female  [25-49]        1.Low
## 63  Female  [25-49]     2.Medium
## 64    Male  [25-49]       3.High
## 65    Male  [16-24]       3.High
## 66    Male      50+     2.Medium
## 67    Male  [25-49]       3.High
## 68    Male      50+       3.High
## 69  Female  [25-49]       3.High
## 70    Male  [25-49]     2.Medium
## 71  Female  [16-24]       3.High
## 72    Male  [16-24]       3.High
## 73  Female      50+        1.Low
## 74    Male  [25-49]       3.High
## 75  Female  [16-24]     2.Medium
## 76  Female      50+     2.Medium
## 77    Male  [25-49]        1.Low
## 78  Female  [25-49]     2.Medium
## 79    Male  [25-49]       3.High
## 80  Female  [25-49]     2.Medium
## 81    Male      50+        1.Low
## 82  Female      50+        1.Low
## 83  Female  [16-24]     2.Medium
## 84  Female  [25-49]       3.High
## 85    Male  [16-24]        1.Low
## 86    Male      50+     2.Medium
## 87  Female  [16-24]        1.Low
## 88    Male  [16-24]        1.Low
## 89  Female  [16-24]     2.Medium
## 90  Female      50+       3.High
## 91    Male  [25-49]       3.High
## 92  Female  [25-49]        1.Low
## 93    Male  [25-49]       3.High
## 94  Female  [16-24]       3.High
## 95  Female  [25-49]       3.High
## 96  Female      50+       3.High
## 97  Female  [16-24]        1.Low
## 98    Male  [25-49]        1.Low
## 99    Male      50+     2.Medium
## 100 Female  [16-24]     2.Medium

and a function to visualize conditional distributions:

plot_var = function(varname, varname2, col){
  var_data = t(table(df[,varname], df[,varname2]))
  var_data_ordered = var_data[order(rownames(var_data)),]
  
  bar_heights = sapply(colnames(var_data_ordered), function(x) cumsum(var_data_ordered[,x]))
  bar_incr = rbind(bar_heights[1,], diff(bar_heights))
  
  percentages = apply(bar_incr, 2, function(x) paste(round(x/sum(x), 2)*100, "%"))
  
  ypos = bar_heights - bar_incr/2
  
  bar_widths = apply(var_data, 2, sum)
  
  bp = barplot(var_data_ordered, main=paste(varname2, "by", varname), 
               names=paste(colnames(var_data), "(", bar_widths, ")"),
               beside=F, col=col, 
               legend=rownames(var_data), args.legend=list(x="topleft", cex=0.6, inset=c(0, -0.05)),
               width=bar_widths)
  
  i=1
  for(xpos in bp){
    text(xpos, ypos[,i], percentages[,i])
    i = i + 1
  }
}

We can call the function like so:

library(RColorBrewer)
plot_var("gender", "buying_ratio", brewer.pal(3, "Oranges"))

and the other way around

plot_var("buying_ratio", "gender", c("indianred1", "lightblue2"))