library(tm)
library(ggplot2)
library(wordcloud)
library(dplyr)
library(plotly)directory = c('~/mises')corpus = Corpus(DirSource(directory),
readerControl = list ( reader = readPDF(), language =
'en', encoding = 'UTF-8'))
corpus = tm_map(corpus, removePunctuation)
corpus = tm_map(corpus, removeNumbers)
corpus = tm_map(corpus, tolower)
corpus = tm_map(corpus, removeWords, stopwords(kind = 'en'))
corpus = tm_map(corpus, PlainTextDocument)term_mat = DocumentTermMatrix(corpus)
term_mat = removeSparseTerms(term_mat, 0.01)
fre_word = colSums(as.matrix(term_mat))
fre_word = as.data.frame(fre_word)
fre_word = data.frame(
(row.names(fre_word)), fre_word)
colnames(fre_word) = c('term', 'frequency')
fre_word$terms = levels(fre_word$terms)
row.names(fre_word) = c()over80 = fre_word %>% filter(frequency > 80)
over135 = fre_word %>% filter(frequency > 135)ggplotly( ggplot(over135)+geom_bar (aes( term , frequency , fill = frequency ) , position='dodge',stat='identity' ) + theme_minimal())palette = brewer.pal(n = 9, "Paired")
wordcloud(words = over80$term, freq = over80$frequency, colors = palette)