Loading Necessary Libraries

library(NLP)
library(tm)
library(fpc)
library(RColorBrewer)
library(ggplot2)
library(stringi)
library(RWeka)
library(data.table)

Reading The Table

ngramtable <- fread("C:/Data Science/R/Coursera Capstone Project/Final Project Submission/ngrams.txt")
setkeyv(ngramtable, c('word0', 'word1', 'word2', 'word3', 'freq'))

Cleaning The Text

cleanthetext <- function(text){
        cleanedtext <- tolower(text)
        cleanedtext <- stripWhitespace(cleanedtext)
        cleanedtext <- gsub("[^\\p{L}\\s]+", "", cleanedtext, perl=T)
        return(cleanedtext)
}

Spliting The Words In The Text

splittheword <- function(text){
        cleanedtext <- cleanthetext(text)
        splittedtext <- unlist(strsplit(cleanedtext," "))
        return(splittedtext)
}

Single Word Prediction

singlewordprediction <- function(cleanedtextlist){
        datatable <- ngramtable[list("-",cleanedtextlist[1])]
        datatable <- datatable[datatable$word2!="-",]
        datatable <- datatable[order(datatable$freq,decreasing = TRUE),]
        
        duplicate <- duplicated(subset(datatable,select = c("word1","word2")))
        datatable <- datatable[!duplicate,]
        
        alternateprediction = ''
        if (length(datatable) > 1){alternateprediction <- datatable$word2[2]}
        
        guessprediction <- datatable$word2[1]
        if(is.na(guessprediction)|is.null(guessprediction)){
                guessprediction <- "Unable to predict the next word"
        }
        return(c(guessprediction,alternateprediction))
}

Two Words Prediction

twowordprediction <- function(cleanedtextlist){
        datatable <- ngramtable[list("-",cleanedtextlist[1],cleanedtextlist[2])]
        datatable <- datatable[datatable$word3!="-",]
        datatable <- datatable[order(datatable$freq,decreasing = TRUE),]
        
        duplicate <- duplicated(subset(datatable,select = c("word1","word2","word3")))
        datatable <- datatable[!duplicate,]
        
        alternateprediction = ''
        if (length(datatable) > 1){alternateprediction <- datatable$word3[2]}
        
        guessprediction <- datatable$word3[1]
        if(is.na(guessprediction)|is.null(guessprediction)){
                guessprediction <- singlewordprediction(cleanedtextlist[2])
        }
        return(c(guessprediction,alternateprediction))
}

Three Words Predictions

threewordprediction <- function(cleanedtextlist){
        datatable <- ngramtable[list("-",cleanedtextlist[1],cleanedtextlist[2],
                                     cleanedtextlist[3])]
        datatable <- datatable[datatable$word4!="-",]
        datatable <- datatable[order(datatable$freq,decreasing = TRUE),]
        
        duplicate <- duplicated(subset(datatable,select = 
                                             c("word1","word2","word3","word4")))
        datatable <- datatable[!duplicate,]
        
        alternateprediction = ''
        if (length(datatable) > 1){alternateprediction <- datatable$word4[2]}
        
        guessprediction <- datatable$word4[1]
        if(is.na(guessprediction)|is.null(guessprediction)){
                shortlist <- c(cleanedtextlist[2],cleanedtextlist[3])
                guessprediction <- twowordprediction(shortlist)
                if(is.na(guessprediction)|is.null(guessprediction)){
                        guessprediction <-         
                                singlewordprediction(cleanedtextlist[3])
                }
        }
        return(c(guessprediction,alternateprediction))
}