Shinya Hashimoto
2024-04-03
##Future Directions
library(shiny)
# Shiny UI
ui <- fluidPage(
titlePanel("Next Word Prediction"),
sidebarLayout(
sidebarPanel(
textInput("phrase", "Enter a phrase:", value = "Type your phrase here"),
actionButton("predict", "Predict Next Word")
),
mainPanel(
tableOutput("prediction")
)
)
)library(stringr)
library(dplyr)
library(readr)
library(shiny)
library(stringr)
ngrams_df <- readRDS("./trigram.rds")
# Load a list of profanity words from an external source
profanity_url <- "https://www.cs.cmu.edu/~biglou/resources/bad-words.txt"
profanity <- readLines(profanity_url)
predict_next_word_ngram <- function(sentence_fragment, ngrams_df, profanity) {
processed_text <- iconv(sentence_fragment, "latin1", "ASCII", sub = "") %>%
# Remove profanity words
{ text_without_profanity <- paste(setdiff(str_split(., "\\s+")[[1]], profanity), collapse=" "); . } %>%
# Remove URLs
gsub("http[[:alnum:][:punct:]]*", "", .) %>%
# Remove all punctuation
gsub("[[:punct:]]", "", .) %>%
# Remove all digits
gsub("[[:digit:]]", "", .) %>%
# Convert all text to lowercase to ensure uniformity
tolower() %>%
# Remove extra spaces
str_squish()
words <- str_split(processed_text, "\\s+")[[1]]
n <- length(words)
if (n >= 2) {
pattern <- paste(words[(n-1):n], collapse=" ")
matching_ngrams <- ngrams_df %>%
filter(str_detect(term, paste0("^", pattern))) %>%
arrange(desc(freq))
if (nrow(matching_ngrams) > 0) {
next_words <- str_extract(matching_ngrams$term, "\\S+$")
frequencies <- matching_ngrams$freq
return(data.frame(next_words, frequencies))
}
}
return(data.frame(next_words = "No prediction available", frequencies = NA))
}
# Define server logic required to draw a histogram
# Shiny server logic
server <- function(input, output) {
observeEvent(input$predict, {
prediction_df <- predict_next_word_ngram(input$phrase, ngrams_df, profanity)
output$prediction <- renderTable({
prediction_df
})
})
}