Load required libraries.
library(tm)
Set the working directory to the location of the script and data.
setwd("~/Youtube")
Load corpus from the tm package.
data("acq")
length(acq)
## [1] 50
Load corpus from local files.
Load the Sentiment polarity dataset version 2.0 from the Movie review data.
Once unzipped, we can access the different documents in the dataset.
path = "./review_polarity/txt_sentoken/"
dir = DirSource(path, encoding = "UTF-8", recursive = T)
corpus = Corpus(dir)
length(corpus)
## [1] 2000
dir.pos = DirSource(paste(path,"pos/",sep=""), encoding = "UTF-8")
corpus.pos = Corpus(dir.pos)
length(corpus.pos)
## [1] 1000
dir.neg = DirSource(paste(path,"neg/",sep=""), encoding = "UTF-8")
corpus.neg = Corpus(dir.neg)
length(corpus.neg)
## [1] 1000