Markus — Jun 23, 2013, 12:14 AM
# Demo
# Mosaic plot using data on word origin in N|uu, a South African Khoisan language
# Reads in and cleans up data from a text file exported from 'A Linguist's Toolbox'
scan_clean <- function(path) { # function takes a path argument (as in scan_clean("dataset.txt"))
list <- scan(file = path, # read in data set from path
what = "character", # - read characters only, no spaces etc.
quiet = TRUE, # do not print number of read items into console
blank.lines.skip = TRUE) # skip blank lines in data file
list <- gsub(pattern = "[^A-Z]",
replacement = "",
list) # remove symbols given in pattern argument,
# here: remove all characters (= replace with "") that are not (^) upper-case letters (A-Z).
# also see ?regex for regular expressions, character classes and metacharacters!
list <- list[list != ""] # remove empty entries
return(list) # return clean data set
}
# Creates data table from two source files
cool_table <- function(source_1, source_2, name_1, name_2) {
speakera <- scan_clean(source_1) # file.choose() would also be a cool feature
speakerb <- scan_clean(source_2)
a <- rep(name_1, length(speakera)) # this solution is semi-optimal, suggestions welcome
b <- rep(name_2, length(speakerb)) # rep() is for repeat, rep("a",2) returns "a" "a"
speaker_name <- c(a,b)
word_origin <- c(speakera,speakerb)
data <- data.frame(speaker_name, word_origin)
data_table <- table(data)
return(data_table)
}
data_table <- cool_table("wordlist_so_2G.txt", "wordlist_so_2C.txt" , "G", "C")
color_count <- dim(data_table)[2]
# ^set color_count to the number of rows in data_table
# dim(x) returns dimensions of x where x is some sort of matrix
# previous solution (works just as fine):
# color_count <- length(levels(factor(colnames(data_table))))
package_available <- require(RColorBrewer)
Loading required package: RColorBrewer
# ^try to load RColorBrewer package
# if package is available, set package_available to TRUE, else to FALSE
# if package is not installed yet, try install.packages("RColorBrewer")
#########################################################################
### WHEN COPY/PASTING THIS CODE MAKE SURE TO DELETE ANY LINES LIKE
### "Loading required package: RColorBrewer"
### THOSE LINES ARE NOT CODE BUT CONSOLE OUTPUT
#########################################################################
plot_color <- c("red2", "gold", "darkgreen", "deepskyblue", "blueviolet") # fallback colors
if (package_available) {
plot_color <- brewer.pal(color_count, "BuGn")
# brewer.pal() creates a custom color palette
# will work only if RColorBrewer package is available (as is in this demo)
# if package is still not available, use fallback colors
}
plot(data_table,
col = plot_color,
main = "Nluu - Word origin ratio per speaker",
xlab = "Speakers' initials",
ylab = "Languages of origin")