Sys.setenv(NOAWT=TRUE)
require("tm")
## Loading required package: tm
## Loading required package: NLP
my.corpus1 <- Corpus(DirSource("~/CLIO-3/federalist_papers"))
getTransformations
## function ()
## c("removeNumbers", "removePunctuation", "removeWords", "stemDocument",
## "stripWhitespace")
## <environment: namespace:tm>
my.corpus1 <- tm_map(my.corpus1, removePunctuation)
my.corpus1 <- tm_map(my.corpus1, removeWords, stopwords("english"))
my.stops1 <- c("history","clio", "programming")
my.corpus1 <- tm_map(my.corpus1, removeWords, my.stops1)
#my.list1 <- unlist(read.table("PATH TO STOPWORD FILE", stringsAsFactors=FALSE)
#my.stops1 <- c(my.list1)
#my.corpus1 <- tm_map(my.corpus1, removeWords, my.stops1)
require("SnowballC")
## Loading required package: SnowballC
my.corpus1 <- tm_map(my.corpus1, content_transformer(tolower))
my.corpus1 <- tm_map(my.corpus1, stemDocument)
my.corpus1 <- tm_map(my.corpus1, removeNumbers)
#my.tdm1 <- TermDocumentMatrix(my.corpus1)
#inspect(my.tdm1)
#my.dtm1 <- DocumentTermMatrix(my.corpus1, control = list(weighting = weightTfIdf, stopwords = TRUE))
#inspect(my.dtm1)
library(magrittr)
#my.tdm1 %>%
# inspect() %>%
#as.data.frame() %>%
# View()
#my.dtm1 %>%
#inspect() %>%
#as.data.frame() %>%
#View()
#my.tdm1 <- removeSparseTerms(my.tdm1, 0.2)
#findFreqTerms(my.tdm1, 2)
#findAssocs(my.tdm1, 'mine', 0.20)
#my.df1 <- as.data.frame(inspect(my.tdm1))
#my.df1.scale <- scale(my.df1)
#d1 <- dist(my.df1.scale,method="euclidean")
#fit1 <- hclust(d1, method="ward.D")
#plot(fit1)
#my.df1 <- as.data.frame(inspect(my.dtm1))
#my.df1.scale <- scale(my.df1)
#d1 <- dist(my.df1.scale,method="euclidean")
#fit <- hclust(d1, method="ward.D")
#plot(fit)