author: Marlen Doskarin date: 06th Apr. 2020
This app is to generate the cloud of words by selecting the available book from the list , frequency of words as they appear and the maximum number of words to be displayed. There are 3 books in the list.
library(tm)
library(wordcloud)
library(memoise)
# The list of valid books
books <<- list("Crime-and-punishment" = "CrimeAndPunishment_by_Fyodor_Dostoevsky",
"Notes-from-the-underground" = "NotesFromTheUnderground_by_Fyodor_Dostoevsky",
"The-brothers-Karamazov" = "TheBrothersKaramoz_by_Fyodor_Dostoyevsky")
# Using "memoise" to automatically cache the results
getTermMatrix <- memoise(function(book) {
# Careful not to let just any name slip in here; a
# malicious user could manipulate this value.
if (!(book %in% books))
stop("Unknown book")
text <- readLines(sprintf("./%s.txt", book),
encoding="UTF-8")
myCorpus = Corpus(VectorSource(text))
myCorpus = tm_map(myCorpus, content_transformer(tolower))
myCorpus = tm_map(myCorpus, removePunctuation)
myCorpus = tm_map(myCorpus, removeNumbers)
myCorpus = tm_map(myCorpus, removeWords,
c(stopwords("SMART"), "thy", "thou", "thee", "the", "and", "but"))
myDTM = TermDocumentMatrix(myCorpus,
control = list(minWordLength = 1))
m = as.matrix(myDTM)
sort(rowSums(m), decreasing = TRUE)
})
# Text of the books downloaded from:
# The Brothers Karamazov by Fyodor Dostoyevsky:
# https://www.gutenberg.org/files/28054/28054-0.txt
# Crime and Punishment by Fyodor Dostoyevsky:
# https://www.gutenberg.org/files/2554/2554-0.txt
# Notes from the Underground by Fyodor Dostoyevsky:
# https://www.gutenberg.org/ebooks/600.txt.utf-8
function(input, output, session) {
# Define a reactive expression for the document term matrix
terms <- reactive({
# Change when the "update" button is pressed...
input$update
# ...but not for anything else
isolate({
withProgress({
setProgress(message = "Processing corpus...")
getTermMatrix(input$selection)
})
})
})
# Make the wordcloud drawing predictable during a session
wordcloud_rep <- repeatable(wordcloud)
output$plot <- renderPlot({
v <- terms()
wordcloud_rep(names(v), v, scale=c(4,0.5),
min.freq = input$freq, max.words=input$max,
colors=brewer.pal(8, "Dark2"))
})
}
function(input, output, session) {
# Define a reactive expression for the document term matrix
terms <- reactive({
# Change when the "update" button is pressed...
input$update
# ...but not for anything else
isolate({
withProgress({
setProgress(message = "Processing corpus...")
getTermMatrix(input$selection)
})
})
})
# Make the wordcloud drawing predictable during a session
wordcloud_rep <- repeatable(wordcloud)
output$plot <- renderPlot({
v <- terms()
wordcloud_rep(names(v), v, scale=c(4,0.5),
min.freq = input$freq, max.words=input$max,
colors=brewer.pal(8, "Dark2"))
})
}
fluidPage(
# Application title
titlePanel("Word Cloud"),
sidebarLayout(
# Sidebar with a slider and selection inputs
sidebarPanel(
selectInput("selection", "Choose a book:",
choices = books),
actionButton("update", "Change"),
hr(),
sliderInput("freq",
"Minimum Frequency:",
min = 1, max = 50, value = 15),
sliderInput("max",
"Maximum Number of Words:",
min = 1, max = 300, value = 100)
),
# Show Word Cloud
mainPanel(
plotOutput("plot")
)
)
)
Result