Markus — Jun 27, 2013, 7:11 PM
# Demo
# N|uu Part of Speech Ratio
path <- "wordlist_ps.txt"
list <- scan(file = path, # read in data set from path
what = "character", # - read characters only, no spaces etc.
# optional:
quiet = TRUE, # do not print number of read items into console
blank.lines.skip = TRUE, # skip blank lines in data file
encoding = "UTF-8")
list <- gsub(pattern = "[^a-z]",
replacement = "",
list)
list <- gsub(pattern = "vatr|vitr|vtr",
replacement = "verb",
list)
list <- grep(pattern = "^(verb|n|part)$", # ^ : emptystring before, $ : emptystring after
list,
value = TRUE)
# also see ?regex for regular expressions, character classes and metacharacters!
colors <- c("gold","red3","gray5")
barplot(table(list),
# optional:
col = colors,
xlab = "Number of matches",
horiz = TRUE,
space = 0,
border = NA # c("#FFFF00","#FF0000","#8899FF")
)