by Deadpool
10th of December, 2018
Predictograph is an algorithm that predicts the next word from the two words you provide. It's main features are:
More on the underlying learning principle: https://en.wikipedia.org/wiki/Katz%27s_back-off_model
Line to be finished [text]: 3-word phrase that should be completed by the algorithm
Style [drop-down menu]: different users have different styles of communication, thus we formed different styles (formal, mixed, informal) to match our users' preferences
Simplicity penalization [slider]: predictions by simpler model tend to be less accurate, thus users can move the slider to make the correction more or less strict
FindTrigram8 <- function(inp, data4, data3, data2, mutation = 75){
trig_a <- data.frame(ngram = character(), freq = numeric())
trig_b <- data.frame(ngram = character(), freq = numeric())
trig_c <- data.frame(ngram = character(), freq = numeric())
inp <- tolower(inp)
inp <- gsub("[[:punct:]]", "", inp)
inp2 <- gsub("_", " ", inp)
regex <- gsub(" ", "_", sprintf("%s%s%s", "^", inp2, "_"))
indices <- grep(regex, data4$feature)
if(length(indices) > 2) {
trig_a <- data4[indices, ]
trig_a$probability_percent <- round(((trig_a$frequency/sum(trig_a$frequency))*100), 2)
trig_a$target[1] <- strsplit(as.character(trig_a$feature), "_")[[1]][[4]]
trig_a$target[2] <- strsplit(as.character(trig_a$feature), "_")[[2]][[4]]
trig_a$target[3] <- strsplit(as.character(trig_a$feature), "_")[[3]][[4]]
trig_x2 <- trig_a[1:3, c("target", "probability_percent")]
} else {
y <- paste(strsplit(inp2," ")[[1]][c(2,3)], collapse=" ")
regex2 <- gsub(" ", "_", sprintf("%s%s%s", "^", y, "_"))
indices2 <- grep(regex2, data3$feature)
if(length(indices2) > 2){
trig_b <- data3[indices2, ]
trig_b$probability_percent <- round(((trig_b$frequency/sum(trig_b$frequency))*mutation), 2)
trig_b$target[1] <- strsplit(as.character(trig_b$feature), "_")[[1]][[3]]
trig_b$target[2] <- strsplit(as.character(trig_b$feature), "_")[[2]][[3]]
trig_b$target[3] <- strsplit(as.character(trig_b$feature), "_")[[3]][[3]]
trig_x2 <- trig_b[1:3, c("target", "probability_percent")]
} else {
y2 <- paste(strsplit(inp2," ")[[1]][3], collapse=" ")
regex3 <- gsub(" ", "_", sprintf("%s%s%s", "^", y2, "_"))
indices3 <- grep(regex3, data2$feature)
if(length(indices3) > 2) {
trig_c <- data2[indices3, ]
trig_c$probability_percent <- round(((trig_c$frequency/sum(trig_c$frequency))*(mutation*mutation/100)), 2)
trig_c$target[1] <- strsplit(as.character(trig_c$feature), "_")[[1]][[2]]
trig_c$target[2] <- strsplit(as.character(trig_c$feature), "_")[[2]][[2]]
trig_c$target[3] <- strsplit(as.character(trig_c$feature), "_")[[3]][[2]]
trig_x2 <- trig_c[1:3, c("target", "probability_percent")]
} else {
x <- c("the", "a", "to")
y <- c(1, 1, 1)
trig_x2 <- data.frame(target = x, probability_percent = y)
}
}
}
print(trig_x2)
}
Find the app here: https://deadpool.shinyapps.io/Predictograph/