N|uu Part of Speech Ratio

Markus — Jun 27, 2013, 7:11 PM

# Demo
# N|uu Part of Speech Ratio

path <- "wordlist_ps.txt"

list <- scan(file = path, # read in data set from path
             what = "character", # - read characters only, no spaces etc.
             # optional:
             quiet = TRUE, # do not print number of read items into console
             blank.lines.skip = TRUE, # skip blank lines in data file
             encoding = "UTF-8")

list <- gsub(pattern = "[^a-z]", 
             replacement = "", 
             list)

list <- gsub(pattern = "vatr|vitr|vtr", 
             replacement = "verb", 
             list)

list <- grep(pattern = "^(verb|n|part)$", # ^ : emptystring before, $ : emptystring after
             list, 
             value = TRUE)
# also see ?regex for regular expressions, character classes and metacharacters!

colors <- c("gold","red3","gray5")

barplot(table(list),
        # optional:
        col = colors, 
        xlab = "Number of matches", 
        horiz = TRUE,
        space = 0,
        border =  NA # c("#FFFF00","#FF0000","#8899FF")
)

plot of chunk unnamed-chunk-1