scan_clean()

Markus — Jun 16, 2013, 10:35 PM

# Reads in and cleans up data from a text file exported from Toolbox
scan_clean <- function(path) {  # function takes a path argument (as in scan_clean("dataset.txt"))
  list <- scan(file = path, # read in data set from path
               what = "character", # - read characters only, no spaces etc.
               quiet = TRUE, # do not print number of read items into console
               blank.lines.skip = TRUE) # skip blank lines in data file
  list <- gsub(pattern = "[^A-Z]", 
               replacement = "", 
               list)  # remove symbols given in pattern argument,
    # here: remove all characters (= replace with "") that are not (^) upper-case letters (A-Z).
    # also see ?regex for regular expressions, character classes and metacharacters!
  list <- list[list != ""]  # remove empty entries
  return(list)  # return clean data set
}
my_table <- table(scan_clean("wordlist_so_1.txt"))
pie(my_table, col = c("red2","darkgreen","gold"), main="A pie chart")

plot of chunk unnamed-chunk-1

# also try col = brewer.pal(length(my_table),"Greens"). brewer.pal() depends on ColorBrewer package