Yinning Zhang
May 2019
N <- length(inputwords)
if(N >= 5) {
# get last 5 words
w <- tail(inputwords, 5)
w1 <- w[1]; w2 <- w[2]; w3 <- w[3]; w4 <- w[4]; w5 <- w[5];
subdf <- data.table()
if(nrow(subdf) == 0) subdf <- subset(DF6grams, word1==w1 & word2==w2 & word3==w3 & word4==w4 & word5==w5)
if(nrow(subdf) == 0) subdf <- subset(DF5grams, word1==w2 & word2==w3 & word3==w4 & word4==w5)
if(nrow(subdf) == 0) subdf <- subset(DF4grams, word1==w3 & word2==w4 & word3==w5)
if(nrow(subdf) == 0) subdf <- subset(DF3grams, word1==w4 & word2==w5)
if(nrow(subdf) == 0) subdf <- subset(DF2grams, word1==w5)
if(nrow(subdf) == 0) subdf <- head(DF1grams, 5)
}
if(nrow(subdf)>0) {
subdf$prob <- subdf$freq / sum(subdf$freq)
predicted <- data.frame(subdf[order(-subdf$prob), (ncol(subdf)-2):ncol(subdf)])
predicted <- head(predicted, min(10, nrow(subdf)))
}