Intelligent Text Predictor

Shinya Hashimoto

2024-04-03

Intelligent Text Predictor

Introduction

The Shiny App

Access and Interface

Functionality

The Algorithm Behind

Overview

Data Source

App Demonstration

Test Cases

Evaluation and Improvements

User Experience

##Future Directions

Conclusion

The Shiny App: Algorithm Explanation and Usage

Algorithm Explanation

Usage

ui.R

library(shiny)

# Shiny UI
ui <- fluidPage(
  titlePanel("Next Word Prediction"),
  sidebarLayout(
    sidebarPanel(
      textInput("phrase", "Enter a phrase:", value = "Type your phrase here"),
      actionButton("predict", "Predict Next Word")
    ),
    mainPanel(
      tableOutput("prediction")
    )
  )
)

server.R

library(stringr)
library(dplyr)
library(readr)
library(shiny)
library(stringr)

ngrams_df <- readRDS("./trigram.rds")

# Load a list of profanity words from an external source
profanity_url <- "https://www.cs.cmu.edu/~biglou/resources/bad-words.txt"
profanity <- readLines(profanity_url)

predict_next_word_ngram <- function(sentence_fragment, ngrams_df, profanity) {
  processed_text <- iconv(sentence_fragment, "latin1", "ASCII", sub = "") %>%
    # Remove profanity words
    { text_without_profanity <- paste(setdiff(str_split(., "\\s+")[[1]], profanity), collapse=" "); . } %>%
    # Remove URLs
    gsub("http[[:alnum:][:punct:]]*", "", .) %>%
    # Remove all punctuation
    gsub("[[:punct:]]", "", .) %>%
    # Remove all digits
    gsub("[[:digit:]]", "", .) %>%
    # Convert all text to lowercase to ensure uniformity
    tolower() %>%
    # Remove extra spaces
    str_squish()
  
  words <- str_split(processed_text, "\\s+")[[1]]
  n <- length(words)
  if (n >= 2) {
    pattern <- paste(words[(n-1):n], collapse=" ")
    matching_ngrams <- ngrams_df %>%
      filter(str_detect(term, paste0("^", pattern))) %>%
      arrange(desc(freq))
    if (nrow(matching_ngrams) > 0) {
      next_words <- str_extract(matching_ngrams$term, "\\S+$")
      frequencies <- matching_ngrams$freq
      return(data.frame(next_words, frequencies))
    }
  }
  return(data.frame(next_words = "No prediction available", frequencies = NA))
}


# Define server logic required to draw a histogram
# Shiny server logic
server <- function(input, output) {
  observeEvent(input$predict, {
    prediction_df <- predict_next_word_ngram(input$phrase, ngrams_df, profanity)
    output$prediction <- renderTable({
      prediction_df
    })
  })
}