by Deadpool
6th of December, 2018
Predictograph is an algorithm that predicts the next word from the two words you provide. It's main features are:
More on the underlying learning principle: https://en.wikipedia.org/wiki/Katz%27s_back-off_model
Line to be finished [text]: 2-word phrase that should be completed by the algorithm
Style [drop-down menu]: different users have different styles of communication, thus we formed different styles (formal, mixed, informal) to match our users' preferences
Weight [slider]: predictions by simpler model tend to be less accurate, thus users can move the slider to make the correction more or less strict
FindTrigram7 <- function(inp, data3, data2, mutation = 50){
trig_a <- data.frame(ngram = character(), freq = numeric())
trig_b <- data.frame(ngram = character(), freq = numeric())
inp <- tolower(inp)
inp <- gsub("[[:punct:]]", "", inp)
regex <- gsub(" ", "_", sprintf("%s%s%s", "^", inp, "_"))
indices <- grep(regex, data3$feature)
if(length(indices) > 2) {
trig_a <- data3[indices, ]
trig_a$probability_percent <- round(((trig_a$frequency/sum(trig_a$frequency))*100), 2)
trig_a$target[1] <- strsplit(as.character(trig_a$feature), "_")[[1]][[3]]
trig_a$target[2] <- strsplit(as.character(trig_a$feature), "_")[[2]][[3]]
trig_a$target[3] <- strsplit(as.character(trig_a$feature), "_")[[3]][[3]]
trig_a2 <- trig_a[1:3, c("target", "probability_percent")]
print(trig_a2)
} else {
y <- paste(strsplit(inp," ")[[1]][2], collapse=" ")
regex2 <- gsub(" ", "_", sprintf("%s%s%s", "^", y, "_"))
indices2 <- grep(regex2, data2$feature)
if(length(indices2) < 1) {
x <- c("no_data", "no_data", "no_data")
y <- c(0, 0, 0)
trig_b2 <- data.frame(target = x, probability_percent = y)
} else {
trig_b <- data2[indices2, ]
trig_b$probability_percent <- round(((trig_b$frequency/sum(trig_b$frequency))*mutation), 2)
trig_b$target[1] <- strsplit(as.character(trig_b$feature), "_")[[1]][[2]]
trig_b$target[2] <- strsplit(as.character(trig_b$feature), "_")[[2]][[2]]
trig_b$target[3] <- strsplit(as.character(trig_b$feature), "_")[[3]][[2]]
trig_b2 <- trig_b[1:3, c("target", "probability_percent")]}
print(trig_b2)
}
}
Find the app here: https://deadpool.shinyapps.io/Predictograph/