Text Minning
Tokenizing
library(tm)
## Loading required package: NLP
Text=c("We will never know the real answer, before you try")
MC_tokenizer(Text)
## [1] "We" "will" "never" "know" "the" "real" "answer"
## [8] "" "before" "you" "try"
scan_tokenizer(Text)
## [1] "We" "will" "never" "know" "the" "real" "answer,"
## [8] "before" "you" "try"
strsplit_space_tokenizer <- function(x) unlist(strsplit(x, "[[:space:]]+"))
strsplit_space_tokenizer(Text)
## [1] "We" "will" "never" "know" "the" "real" "answer,"
## [8] "before" "you" "try"
n-Gram Tokenizing
library(tm)
if (Sys.getenv("JAVA_HOME")!="")
Sys.setenv(JAVA_HOME="")
#install java machine
library(rJava)
library(RWeka)
Text=c("We will never know the real answer, before you try")
NGramTokenizer(Text, Weka_control(min = 2, max = 2))
## [1] "We will" "will never" "never know" "know the"
## [5] "the real" "real answer" "answer before" "before you"
## [9] "you try"