library(tm)
## Loading required package: NLP
library(stringi)
library(quanteda)
## Package version: 4.3.1
## Unicode version: 15.1
## ICU version: 74.1
## Parallel computing: 18 of 18 threads used.
## See https://quanteda.io for tutorials and examples.
## 
## Attaching package: 'quanteda'
## The following object is masked from 'package:tm':
## 
##     stopwords
## The following objects are masked from 'package:NLP':
## 
##     meta, meta<-
library(quanteda.textstats)
blogs <- readLines("en_US.blogs.txt", warn = FALSE)
news <- readLines("en_US.news.txt", warn = FALSE)
twitter <- readLines("en_US.twitter.txt", warn = FALSE)
set.seed(123)

sample_data <- c(
  sample(blogs, 3000),
  sample(news, 3000),
  sample(twitter, 3000)
)
tokens_data <- tokens(sample_data, remove_punct = TRUE, remove_numbers = TRUE)
tokens_data <- tokens_remove(tokens_data, stopwords("en"))
bigram <- tokens_ngrams(tokens_data, n = 2)
trigram <- tokens_ngrams(tokens_data, n = 3)
bigram_dfm <- dfm(bigram)
trigram_dfm <- dfm(trigram)

bigram_freq <- textstat_frequency(bigram_dfm)
trigram_freq <- textstat_frequency(trigram_dfm)
predict_top3 <- function(text) {
  text <- tolower(text)
  
  tri_match <- subset(trigram_freq, grepl(paste0("^", text), feature))
  if(nrow(tri_match) > 0) {
    return(head(tri_match$feature, 3))
  }
  
  bi_match <- subset(bigram_freq, grepl(paste0("^", text), feature))
  if(nrow(bi_match) > 0) {
    return(head(bi_match$feature, 3))
  }
  
  return(c("the", "and", "to"))
}
predict_top3("i love")
## [1] "the" "and" "to"

Additional Improvement

The model was further improved by providing the top three predicted words instead of a single prediction. This gives users more options and improves usability of the model.

predict_top3("i love")
## [1] "the" "and" "to"