Shiny Application & Reproducible Pitch

author: Marlen Doskarin date: 06th Apr. 2020

Description

This app is to generate the cloud of words by selecting the available book from the list , frequency of words as they appear and the maximum number of words to be displayed. There are 3 books in the list.

global.R

library(tm)
library(wordcloud)
library(memoise)

# The list of valid books
books <<- list("Crime-and-punishment" = "CrimeAndPunishment_by_Fyodor_Dostoevsky",
               "Notes-from-the-underground" = "NotesFromTheUnderground_by_Fyodor_Dostoevsky",
               "The-brothers-Karamazov" = "TheBrothersKaramoz_by_Fyodor_Dostoyevsky")

# Using "memoise" to automatically cache the results
getTermMatrix <- memoise(function(book) {
    # Careful not to let just any name slip in here; a
    # malicious user could manipulate this value.
    if (!(book %in% books))
        stop("Unknown book")
    
    text <- readLines(sprintf("./%s.txt", book),
                      encoding="UTF-8")
    
    myCorpus = Corpus(VectorSource(text))
    myCorpus = tm_map(myCorpus, content_transformer(tolower))
    myCorpus = tm_map(myCorpus, removePunctuation)
    myCorpus = tm_map(myCorpus, removeNumbers)
    myCorpus = tm_map(myCorpus, removeWords,
                      c(stopwords("SMART"), "thy", "thou", "thee", "the", "and", "but"))
    
    myDTM = TermDocumentMatrix(myCorpus,
                               control = list(minWordLength = 1))
    
    m = as.matrix(myDTM)
    
    sort(rowSums(m), decreasing = TRUE)
})

server.R

# Text of the books downloaded from:
# The Brothers Karamazov by Fyodor Dostoyevsky:
#  https://www.gutenberg.org/files/28054/28054-0.txt
# Crime and Punishment by Fyodor Dostoyevsky:
#  https://www.gutenberg.org/files/2554/2554-0.txt
# Notes from the Underground by Fyodor Dostoyevsky:
#  https://www.gutenberg.org/ebooks/600.txt.utf-8

function(input, output, session) {
    # Define a reactive expression for the document term matrix
    terms <- reactive({
        # Change when the "update" button is pressed...
        input$update
        # ...but not for anything else
        isolate({
            withProgress({
                setProgress(message = "Processing corpus...")
                getTermMatrix(input$selection)
            })
        })
    })
    
    # Make the wordcloud drawing predictable during a session
    wordcloud_rep <- repeatable(wordcloud)
    
    output$plot <- renderPlot({
        v <- terms()
        wordcloud_rep(names(v), v, scale=c(4,0.5),
                      min.freq = input$freq, max.words=input$max,
                      colors=brewer.pal(8, "Dark2"))
    })
}
function(input, output, session) {
    # Define a reactive expression for the document term matrix
    terms <- reactive({
        # Change when the "update" button is pressed...
        input$update
        # ...but not for anything else
        isolate({
            withProgress({
                setProgress(message = "Processing corpus...")
                getTermMatrix(input$selection)
            })
        })
    })
    
    # Make the wordcloud drawing predictable during a session
    wordcloud_rep <- repeatable(wordcloud)
    
    output$plot <- renderPlot({
        v <- terms()
        wordcloud_rep(names(v), v, scale=c(4,0.5),
                      min.freq = input$freq, max.words=input$max,
                      colors=brewer.pal(8, "Dark2"))
    })
}

ui.R

fluidPage(
    # Application title
    titlePanel("Word Cloud"),
    
    sidebarLayout(
        # Sidebar with a slider and selection inputs
        sidebarPanel(
            selectInput("selection", "Choose a book:",
                        choices = books),
            actionButton("update", "Change"),
            hr(),
            sliderInput("freq",
                        "Minimum Frequency:",
                        min = 1,  max = 50, value = 15),
            sliderInput("max",
                        "Maximum Number of Words:",
                        min = 1,  max = 300,  value = 100)
        ),
        
        # Show Word Cloud
        mainPanel(
            plotOutput("plot")
        )
    )
)

Output

Result

Result