Getting ready

library(tm)
library(ggplot2)
library(wordcloud)
library(dplyr)
library(plotly)

Define directory

directory = c('~/mises')

Corpus

corpus = Corpus(DirSource(directory),
                readerControl = list ( reader = readPDF(), language = 
                                         'en', encoding = 'UTF-8'))

corpus = tm_map(corpus, removePunctuation)
corpus = tm_map(corpus, removeNumbers)
corpus = tm_map(corpus, tolower)
corpus = tm_map(corpus, removeWords, stopwords(kind = 'en'))
corpus = tm_map(corpus, PlainTextDocument)

Term matrix

term_mat = DocumentTermMatrix(corpus)
term_mat = removeSparseTerms(term_mat, 0.01)

fre_word = colSums(as.matrix(term_mat))
fre_word = as.data.frame(fre_word)
fre_word = data.frame(
  (row.names(fre_word)), fre_word)

colnames(fre_word) = c('term', 'frequency')

fre_word$terms = levels(fre_word$terms)
row.names(fre_word) = c()

Plots

over80 = fre_word %>% filter(frequency > 80)
over135 = fre_word %>% filter(frequency > 135)
ggplotly( ggplot(over135)+geom_bar (aes( term , frequency , fill = frequency ) , position='dodge',stat='identity' ) + theme_minimal())

palette = brewer.pal(n = 9, "Paired")
wordcloud(words = over80$term, freq = over80$frequency, colors = palette)