Demo: Mosaic plot using data on word origin in N|uu, a South African Khoisan language

Markus — Jun 23, 2013, 12:14 AM

# Demo
# Mosaic plot using data on word origin in N|uu, a South African Khoisan language

# Reads in and cleans up data from a text file exported from 'A Linguist's Toolbox'
scan_clean <- function(path) {  # function takes a path argument (as in scan_clean("dataset.txt"))
  list <- scan(file = path, # read in data set from path
               what = "character", # - read characters only, no spaces etc.
               quiet = TRUE, # do not print number of read items into console
               blank.lines.skip = TRUE) # skip blank lines in data file
  list <- gsub(pattern = "[^A-Z]", 
               replacement = "", 
               list)  # remove symbols given in pattern argument,
  # here: remove all characters (= replace with "") that are not (^) upper-case letters (A-Z).
  # also see ?regex for regular expressions, character classes and metacharacters!
  list <- list[list != ""]  # remove empty entries
  return(list)  # return clean data set
}

# Creates data table from two source files
cool_table <- function(source_1, source_2, name_1, name_2) {
  speakera <- scan_clean(source_1)  # file.choose() would also be a cool feature
  speakerb <- scan_clean(source_2)
  a <- rep(name_1, length(speakera))  # this solution is semi-optimal, suggestions welcome
  b <- rep(name_2, length(speakerb))  # rep() is for repeat, rep("a",2) returns "a" "a"
  speaker_name <- c(a,b)
  word_origin <- c(speakera,speakerb)
  data <- data.frame(speaker_name, word_origin)
  data_table <- table(data)
  return(data_table)
}

data_table <- cool_table("wordlist_so_2G.txt", "wordlist_so_2C.txt" , "G", "C")
color_count <- dim(data_table)[2] 
# ^set color_count to the number of rows in data_table
# dim(x) returns dimensions of x where x is some sort of matrix
# previous solution (works just as fine): 
# color_count <- length(levels(factor(colnames(data_table)))) 

package_available <- require(RColorBrewer)
Loading required package: RColorBrewer
# ^try to load RColorBrewer package
# if package is available, set package_available to TRUE, else to FALSE
# if package is not installed yet, try install.packages("RColorBrewer")
#########################################################################
###  WHEN COPY/PASTING THIS CODE MAKE SURE TO DELETE ANY LINES LIKE
###  "Loading required package: RColorBrewer"
###  THOSE LINES ARE NOT CODE BUT CONSOLE OUTPUT
#########################################################################

plot_color <- c("red2", "gold", "darkgreen", "deepskyblue", "blueviolet") # fallback colors
if (package_available) { 
  plot_color <- brewer.pal(color_count, "BuGn")
  # brewer.pal() creates a custom color palette
  # will work only if RColorBrewer package is available (as is in this demo)
  # if package is still not available, use fallback colors
}

plot(data_table,
     col = plot_color,
     main = "Nluu   -   Word origin ratio per speaker", 
     xlab = "Speakers' initials", 
     ylab = "Languages of origin")

plot of chunk unnamed-chunk-1