library(ngram)
- We process the corpus and compute the trigram using the ngram package
# Example text corpus
corpus <- c(
"This is some sample text for a demo",
"This is some more text here"
)
# We tokenize the corpus to trigrams
trigrams_text <- ngram(corpus, n = 3, sep = " ")
trigram_probabilities <- get.phrasetable(trigrams_text)
print(trigram_probabilities) # Based on this value we compute the most likely next word.
## ngrams freq prop
## 1 This is some 2 0.2
## 2 more text here 1 0.1
## 3 for a demo 1 0.1
## 4 some more text 1 0.1
## 5 is some more 1 0.1
## 6 text for a 1 0.1
## 7 is some sample 1 0.1
## 8 sample text for 1 0.1
## 9 some sample text 1 0.1