- Load and cleaning Data
I downloaded the dataset from here
Replace all non alphanumeric letters with space;
Remove excessive spaces;
dataset = sent_detect(dataset, language = "en", model = NULL) body = VCorpus(VectorSource(dataset body = tm_map( body, removeNumbers) # removing numbers body = tm_map( body, stripWhitespace) # removing whitespaces body = tm_map( body, tolower) #lowercasing all contents body = tm_map( body, removePunctuation) # removing special characters