library(quanteda)
library(quanteda.textmodels)
library(quanteda.textplots)
library(quanteda.textstats)
library(qdapDictionaries)
library(RColorBrewer)
library(reshape2)
library(dplyr)
library(data.table)
printToLog <- function(X,Y){
X <- paste(format(Sys.time(),"%a %b %d %X %Y"),X,sep="-- ")
cat(X,file=Y,sep="\n",append=TRUE)
}
cleanUpToken <- function(myToken)
{
myToken <- myToken %>%
tokens_select(pattern = fBufBadWords, selection = "remove",valuetype = "fixed",padding=TRUE) %>% tokens_select(pattern = fBufBadWords.2, selection = "remove",valuetype = "regex",padding=TRUE) %>% tokens_select(remove.list,selection="remove",valuetype = "fixed",padding=TRUE) %>% tokens_select(special.chars,selection="remove",valuetype = "regex",padding=TRUE)
myToken <- myToken %>% tokens_replace("rt","right", valuetype = "fixed") %>%
tokens_replace("lol","laugh", valuetype = "fixed") %>%
tokens_replace("im","i'm", valuetype = "fixed") %>%
tokens_replace("ur","your", valuetype = "fixed") %>%
tokens_replace("wanna","want", valuetype = "fixed") %>%
tokens_replace("omg","oh", valuetype = "fixed") %>%
tokens_replace("bro","friend", valuetype = "fixed") %>%
tokens_replace("yo","nice", valuetype = "fixed") %>%
tokens_replace("thx","thanks", valuetype = "fixed") %>%
tokens_replace("ppl","people", valuetype = "fixed") %>%
tokens_replace("haha","funny", valuetype = "fixed") %>%
tokens_replace("^(ha)+$","funny", valuetype = "regex") %>%
tokens_replace("lmao","funny", valuetype = "fixed") %>%
tokens_replace("lmfao","funny", valuetype = "fixed") %>%
tokens_replace("smh","shocked", valuetype = "fixed") %>%
tokens_replace("dm","contact", valuetype = "fixed") %>%
tokens_replace("cuz","because", valuetype = "fixed") %>%
tokens_replace("aint","isn't", valuetype = "fixed") %>%
tokens_replace("idk","unsure", valuetype = "fixed") %>%
tokens_replace("nite","night", valuetype = "fixed") %>%
tokens_replace("+","and",valuetype = "fixed") %>%
tokens_replace("y'all","everyone",valuetype = "fixed") %>%
tokens_replace("yr","year",valuetype = "fixed") %>%
tokens_replace("gettin","getting",valuetype = "fixed") %>%
tokens_replace("gotta","should",valuetype = "fixed")
return(myToken)
}
set.seed(12345)
generateCorpus <- function(fileName,doctag)
{
con <- file(fileName, "rb")
fBuf <- readLines(con,skipNul = TRUE)
close(con)
fBuf.length <- length(fBuf)
fText <- corpus(fBuf,docnames = paste0(doctag,"_",seq_len(fBuf.length)))
fText <- fText %>% corpus_reshape(to="sentences")
rm(fBuf)
return(fText)
}
myBlogCorpus <- generateCorpus("./final/en_US/en_US.blogs.txt","blog")
myNewCorpus <- generateCorpus("./final/en_US/en_US.news.txt","news")
myTwitterCorpus <- generateCorpus("./final/en_US/en_US.twitter.txt","twitter")
myMergedCorpus <- c(myBlogCorpus,myNewCorpus,myTwitterCorpus)
#Consciously setting a different seed in a hope to get an unseen data
set.seed(555)
merged.corpus.length <- length(myMergedCorpus)
mySampleCorpus <- myMergedCorpus[sample(merged.corpus.length,merged.corpus.length*0.01)]
myToken <- tokens(mySampleCorpus,remove_punct = TRUE)
my.addon.words <- NULL
is.word <- function(x) x %in% c(GradyAugmented,my.addon.words)
remove.list <- NULL
remove.list <- c(remove.list,0:9)
remove.list <- c(remove.list,LETTERS[!(LETTERS %in% c('A','I'))])
remove.list <- c(remove.list,letters[!(letters %in% c('a','i'))])
remove.list <- c(remove.list, TRUE, FALSE)
myToken <- myToken %>% cleanUpToken()
token.length <- length(myToken)
token.length
## [1] 80707
Here although we have generated 6 grams, 5 grams and 4 grams; we are conducting the validation exercise on 5grams. First 4 words are predictors and 5th word is the target word.
hGramToken <- tokens_ngrams(myToken, n = 6)
hGramToken <- hGramToken %>% tokens_remove("_{2,}")
pGramToken <- tokens_ngrams(myToken, n = 5)
pGramToken <- pGramToken %>% tokens_remove("_{2,}")
qGramToken <- tokens_ngrams(myToken, n = 4)
qGramToken <- qGramToken %>% tokens_remove("_{2,}")
tGramToken <- tokens_ngrams(myToken, n = 3)
tGramToken <- tGramToken %>% tokens_remove("_{2,}")
dfmHG <- dfm(hGramToken)
dfmPG <- dfm(pGramToken)
dfmQG <- dfm(qGramToken)
dfmTG <- dfm(tGramToken)
hgNames <- colnames(dfmHG)
pgNames <- colnames(dfmPG)
qgNames <- colnames(dfmQG)
tgNames <- colnames(dfmTG)
hgNames.length <- length(hgNames)
pgNames.length <- length(pgNames)
qgNames.length <- length(qgNames)
tgNames.length <- length(tgNames)
head(hgNames)
## [1] "or_do_we_just_embrace_the"
## [2] "do_we_just_embrace_the_role"
## [3] "we_just_embrace_the_role_of"
## [4] "just_embrace_the_role_of_aesthetic"
## [5] "embrace_the_role_of_aesthetic_editing"
## [6] "the_role_of_aesthetic_editing_in"
head(pgNames)
## [1] "or_do_we_just_embrace" "do_we_just_embrace_the"
## [3] "we_just_embrace_the_role" "just_embrace_the_role_of"
## [5] "embrace_the_role_of_aesthetic" "the_role_of_aesthetic_editing"
head(qgNames)
## [1] "or_do_we_just" "do_we_just_embrace" "we_just_embrace_the"
## [4] "just_embrace_the_role" "embrace_the_role_of" "the_object_gets_dipped"
head(tgNames)
## [1] "or_do_we" "do_we_just" "we_just_embrace" "just_embrace_the"
## [5] "embrace_the_role" "the_object_gets"
pgNames.length
## [1] 598015
qgNames.length
## [1] 657067
tgNames.length
## [1] 635524
set.seed(666)
hgSample <- hgNames[sample(hgNames.length,hgNames.length*0.01)]
pgSample <- pgNames[sample(pgNames.length,pgNames.length*0.01)]
qgSample <- qgNames[sample(qgNames.length,qgNames.length*0.01)]
tgSample <- tgNames[sample(tgNames.length,tgNames.length*0.01)]
pgSample.length <- length(pgSample)
qgSample.length <- length(qgSample)
tgSample.length <- length(tgSample)
head(hgSample)
## [1] "of_stress_as_we_started_out"
## [2] "might_draw_an_offer_because_both"
## [3] "vision_for_the_promotion_and_marketing"
## [4] "find_a_way_to_capitalize_hitting"
## [5] "sketchbooks_which_i_always_alter_along"
## [6] "the_white_norfork_and_other_tailwaters"
head(pgSample)
## [1] "in_funding_the_trust_so" "legislation_recently_passed_in_the"
## [3] "stabilizes_otherwise_i_do_not" "have_to_endure_days_twice"
## [5] "day_and_he_cares_about" "eighth_grade_i_turned_out"
head(qgSample)
## [1] "of_things_by_placing" "rolls_around_it_can" "follow_me_please_i"
## [4] "practice_and_play_in" "waits_on_them_with" "was_not_me_who"
head(tgSample)
## [1] "can't_even_imagine" "you're_out_there" "her_open_mouth"
## [4] "since_the_age" "store_played_him" "wish_i_can"
pgSample.length
## [1] 5980
qgSample.length
## [1] 6570
tgSample.length
## [1] 6355
rdylgn <- brewer.pal(name="RdYlGn",n=11)
greys <- brewer.pal(name="Greys",n=9)
dfHG <- readRDS("./DSCapstone_TextPredictor/HG_Short.rds")
dfPG <- readRDS("./DSCapstone_TextPredictor/PG_Short.rds")
dfQG <- readRDS("./DSCapstone_TextPredictor/QG_Short.rds")
dfTG <- readRDS("./DSCapstone_TextPredictor/TG_Short.rds")
dfBG <- readRDS("./DSCapstone_TextPredictor/BG_Short.rds")
dfUG <- readRDS("./DSCapstone_TextPredictor/UG_Short.rds")
dfHG.CC <- readRDS("./DSCapstone_TextPredictor/HG_CC.rds")
dfPG.CC <- readRDS("./DSCapstone_TextPredictor/PG_CC.rds")
dfQG.CC <- readRDS("./DSCapstone_TextPredictor/QG_CC.rds")
dfTG.CC <- readRDS("./DSCapstone_TextPredictor/TG_CC.rds")
dfBG.CC <- readRDS("./DSCapstone_TextPredictor/BG_CC.rds")
conStop <- file("morestopwords.txt", "rb" )
fBufStop <- readLines(conStop,skipNul = TRUE)
close(conStop)
fBufStop <- union(fBufStop,stopwords("en"))
short.stop.words <- unlist(strsplit("my our your his her its their the a an and",split=" "))
substrRight <- function(x, n){
substr(x, nchar(x)-n+1, nchar(x))
}
isMatchFound <- function(df)
{
if (!is.null(df))
{
if (nrow(df) > 0)
{
return(TRUE)
}
}
return(FALSE)
}
searchPatternInNG <- function(search.prefix, dfNames, dfIndex, short.stop.words, maskStopWords = TRUE)
{
search.prefix <- gsub("\\\\w+",paste0("(",paste(short.stop.words,collapse="|"),")"),search.prefix)
search.prefix <- gsub("[+]","",search.prefix)
search.string <- paste0("^",search.prefix,"_[a-z']+$")
mytext <- paste0("dfMatch <- as.data.frame(", dfNames[dfIndex], "[feature %like% search.string])")
eval(parse(text=mytext))
if (maskStopWords)
{
#mytext <- paste0("dfDenom <- as.data.frame(", dfNames[dfIndex-1], "[feature %like% search.prefix])")
}
else
{
#mytext <- paste0("dfDenom <- as.data.frame(", dfNames[dfIndex-1], "[feature == search.prefix])")
}
#eval(parse(text=mytext))
#denom <- sum(dfDenom$value)
dfMatch$word <- gsub(paste0("^",search.prefix,"_"),"",dfMatch$feature)
if (maskStopWords)
{
dfMatch <- dfMatch[!(dfMatch$word %in% stopwords("en")),]
}
denom <- sum(dfMatch$value)
dfMatch$probs <- dfMatch$value / denom
dfMatch <- dfMatch[c("word","probs")]
dfMatch <- dfMatch[order(-dfMatch$probs),]
colnames(dfMatch)[2] <- paste0("p",dfIndex)
dfMatch <- head(dfMatch,10)
return(dfMatch)
}
returnSimpleMatch <- function(mytext, dfNames, short.stop.words = "", maskStopWords = TRUE)
{
nGramText <- unlist(strsplit(mytext," "))
if (maskStopWords)
{
nGramText[nGramText %in% short.stop.words] <- "\\w+"
}
dfIndex <- length(nGramText) + 1
if (dfIndex > 6)
{
#print(nGramText)
nGramText <- nGramText[(length(nGramText)-4):length(nGramText)]
#print(nGramText)
}
dfIndex <- length(nGramText) + 1
max.dfIndex <- dfIndex
search.prefix <- paste(nGramText,collapse="_")
dfMatchMain <- NULL
max.p <- 0
while ((search.prefix != "") & (dfIndex >= 1))
{
dfMatchTemp <- searchPatternInNG(search.prefix,dfNames, dfIndex, short.stop.words, maskStopWords)
if (isMatchFound(dfMatchTemp))
{
if (is.null(dfMatchMain))
{
max.p <- dfIndex
dfMatchMain <- dfMatchTemp
}
else
{
dfMatchMain <- full_join(dfMatchMain,dfMatchTemp,by="word")
}
}
search.prefix <- gsub("^[a-z'\\+]+_?","",search.prefix)
dfIndex <- dfIndex - 1
}
if (is.null(dfMatchMain)) return(NULL)
match.cols <- colnames(dfMatchMain)
for (i in max.dfIndex:2)
{
if (!(paste0("p", i) %in% match.cols))
{
mytext <- paste0("dfMatchMain$p", i, " <- rep(NA,nrow(dfMatchMain))")
eval(parse(text=mytext))
}
}
for (i in max.dfIndex:2)
{
mytext <- paste0("dfMatchMain$w", i, "<- rep(", i, ",nrow(dfMatchMain))")
eval(parse(text=mytext))
}
dfMatchMain$p <- rep(0,nrow(dfMatchMain))
for (i in max.dfIndex:2)
{
mytext <- paste0("dfMatchMain$p <- with(dfMatchMain,p + coalesce(p", i, ",0) * w", i , ")")
eval(parse(text=mytext))
}
dfMatchMain$p <- dfMatchMain$p / (sum(max.dfIndex:2))
dfMatchMain <- dfMatchMain[order(-dfMatchMain$p),]
dfMatchMain <- head(dfMatchMain,5)
rownames(dfMatchMain) <- seq_len(nrow(dfMatchMain))
dfMatchMain$Rank <- seq_len(nrow(dfMatchMain))
#print("Printing Interpolation Results")
#print(dfMatchMain)
colnames(dfMatchMain)[colnames(dfMatchMain) == "p"] <- "score"
dfMatchMain <- dfMatchMain[c("word",paste0("p",max.dfIndex:2),paste0("w",max.dfIndex:2),"score","Rank")]
return(dfMatchMain)
}
getdValue <- function(value, dfIndex, max.df.Index)
{
if (dfIndex >= max.df.Index)
{
return(0)
}
dvalue <- ifelse(value==1,0.554,0.75)
return(dvalue)
}
searchKN <- function(search.prefix,dfNames,dfCCNames, dfIndex, max.df.Index, rLevel, dfMatch, maskStopWords=TRUE)
{
if (rLevel == 0)
{
search.string <- paste0("^",search.prefix,"_[a-z']+$")
mytext <- paste0("dfMatch <- ", dfNames[dfIndex], "[feature %like% search.string]")
eval(parse(text=mytext))
if (nrow(dfMatch) == 0) return(NULL)
dfMatch$word <- gsub(paste0("^",search.prefix,"_"),"",dfMatch$feature)
if (maskStopWords)
{
dfMatch <- dfMatch[!(dfMatch$word %in% stopwords("en")),]
}
denom <- sum(dfMatch$value)
dValue <- getdValue(dfMatch$value, dfIndex, max.df.Index)
p <- ifelse(dfMatch$value - dValue>0,dfMatch$value - dValue,0) / denom
lambda <- dValue * nrow(dfMatch) / denom
dfMatch$lastword <- dfMatch$feature
dfMatch$ngram <- rep(dfIndex,nrow(dfMatch))
dfMatch$word <- gsub("^([a-z']+_)+","",dfMatch$feature)
dfMatch <- dfMatch[,.(feature,lastword,word,ngram,value)]
dfMatch$p <- p + lambda * searchKN(gsub("^[a-z']+_?","",search.prefix)
, dfNames, dfCCNames, dfIndex-1,
max.df.Index, rLevel+1, dfMatch,maskStopWords)$p
}
else
{
dfMatch$lastword <- gsub("^[a-z']+_","",dfMatch$lastword)
mytext <- paste0("dfMatch <- merge(dfMatch,", dfCCNames[dfIndex+1], ",by='lastword',all.x=TRUE)")
eval(parse(text=mytext))
dfMatch$contcount <- coalesce(dfMatch$contcount,0)
mytext <- paste0("denom <- nrow(", dfNames[dfIndex+1], ")")
eval(parse(text=mytext))
#Manipulate Denominator for some smoothing otherwise we will get hardly any
denom <- sum(dfMatch$contcount)
if (denom == 0) denom <- 1
dValue <- getdValue(dfMatch$contcount, dfIndex, max.df.Index)
p <- ifelse(dfMatch$contcount - dValue>0,dfMatch$contcount - dValue,0) / denom
lambda <- dValue * nrow(dfMatch) / denom
dfMatch <- dfMatch[,.(feature,lastword,word,ngram,value)]
if (dfIndex == 1)
{
dfMatch$p <- p
}
else
{
dfMatch$p <- p + lambda * searchKN(gsub("^[a-z']+_?","",search.prefix)
, dfNames, dfCCNames, dfIndex-1,
max.df.Index, rLevel+1, dfMatch,maskStopWords)$p
}
}
return(dfMatch)
}
getKNDF <- function(mytext,dfNames, dfCCNames, max.df.Index, maskStopWords=TRUE)
{
nGramText <- unlist(strsplit(mytext," "))
dfIndex <- length(nGramText) + 1
if (dfIndex > max.df.Index)
{
#print(nGramText)
nGramText <- nGramText[(length(nGramText)-(max.df.Index-2)):length(nGramText)]
#print(nGramText)
}
dfIndex <- length(nGramText) + 1
search.prefix <- paste(nGramText,collapse="_")
original.search.prefix <- search.prefix
dfMatchMain <- NULL
while ((search.prefix != "") & (dfIndex >= 1))
{
dfMatchTemp <- searchKN(search.prefix,dfNames,dfCCNames, dfIndex, max.df.Index, 0, NULL, maskStopWords)
if (isMatchFound(dfMatchTemp))
{
if (is.null(dfMatchMain))
{
dfMatchMain <- dfMatchTemp
}
else
{
dfMatchTemp <- dfMatchTemp[!(word %in% intersect(dfMatchTemp$word,dfMatchMain$word))]
if (!is.null(dfMatchTemp))
{
dfMatchMain <- rbind(dfMatchMain,dfMatchTemp)
}
}
}
search.prefix <- gsub("^[a-z'\\+]+_?","",search.prefix)
dfIndex <- dfIndex - 1
}
if (is.null(dfMatchMain)) return(NULL)
if (nrow(dfMatchMain) == 0) return(NULL)
dfMatchMain$word <- gsub("^([a-z']+_)+","",dfMatchMain$feature)
dfMatchMain <- dfMatchMain[,.(word,ngram,p)]
dfMatchMain <- dfMatchMain[order(-p)]
dfMatchMain <- head(dfMatchMain,5)
dfMatchMain$Rank <- seq_len(nrow(dfMatchMain))
return(dfMatchMain)
}
dfNames <- c("dfUG","dfBG","dfTG","dfQG","dfPG","dfHG")
dfCCNames <- c("","dfBG.CC","dfTG.CC","dfQG.CC","dfPG.CC","dfHG.CC")
pgSample.short <- pgSample[sample(length(pgSample),1000)]
length.short <- length(pgSample.short)
df.CV <- data.frame(phrase = pgSample.short,
ip_rank1 = rep(0,length.short),
ip_rank2 = rep(0,length.short),
ip_rank3 = rep(0,length.short),
ip_rank4 = rep(0,length.short),
ip_rank5 = rep(0,length.short),
kn_rank1 = rep(0,length.short),
kn_rank2 = rep(0,length.short),
kn_rank3 = rep(0,length.short),
kn_rank4 = rep(0,length.short),
kn_rank5 = rep(0,length.short),
stopword = rep(0,length.short)
)
oldTime <- as.numeric(Sys.time())*1000
for (mytext in pgSample.short)
{
targetword <- gsub("^([a-z']+_)+","",mytext)
inputtext <- gsub("_"," ",gsub("_[a-z']+$","",mytext))
allowStopWords <- FALSE
if (targetword %in% stopwords("en"))
{
allowStopWords <- TRUE
df.CV[df.CV$phrase == mytext,"stopword"] <- 1
}
df.IP <- returnSimpleMatch(inputtext,dfNames,maskStopWords = !allowStopWords)
if (isMatchFound(df.IP))
{
df.Match <- df.IP[df.IP$word == targetword,]
ip.rank <- 0
if (nrow(df.Match) > 0)
{
ip.rank <- df.Match$Rank
df.CV[df.CV$phrase == mytext,paste0("ip_rank",ip.rank)] <- 1
}
}
}
newTime <- as.numeric(Sys.time())*1000
oldTime.KN <- as.numeric(Sys.time())*1000
for (mytext in pgSample.short)
{
targetword <- gsub("^([a-z']+_)+","",mytext)
inputtext <- gsub("_"," ",gsub("_[a-z']+$","",mytext))
allowStopWords <- FALSE
if (targetword %in% stopwords("en"))
{
allowStopWords <- TRUE
df.CV[df.CV$phrase == mytext,"stopword"] <- 1
}
df.KN <- getKNDF(inputtext,dfNames,dfCCNames,6,maskStopWords = !allowStopWords)
if (isMatchFound(df.KN))
{
df.Match <- df.KN[df.KN$word == targetword,]
kn.rank <- 0
if (nrow(df.Match) > 0)
{
kn.rank <- df.Match$Rank
df.CV[df.CV$phrase == mytext,paste0("kn_rank",kn.rank)] <- 1
}
}
}
newTime.KN <- as.numeric(Sys.time())*1000
length.CV <- nrow(df.CV)
sum.rank1 <- sum(df.CV$ip_rank1)
sum.rank2 <- sum(df.CV$ip_rank2)
sum.rank3 <- sum(df.CV$ip_rank3)
sum.rank4 <- sum(df.CV$ip_rank4)
sum.rank5 <- sum(df.CV$ip_rank5)
sum.kn.rank1 <- sum(df.CV$kn_rank1)
sum.kn.rank2 <- sum(df.CV$kn_rank2)
sum.kn.rank3 <- sum(df.CV$kn_rank3)
sum.kn.rank4 <- sum(df.CV$kn_rank4)
sum.kn.rank5 <- sum(df.CV$kn_rank5)
time.taken <- (newTime - oldTime) / length.CV
total.catches <- (sum.rank1 + sum.rank2 + sum.rank3 + sum.rank4 + sum.rank5)
accuracy <- total.catches / length.CV
time.taken.kn <- (newTime.KN - oldTime.KN) / length.CV
total.catches.kn <- (sum.kn.rank1 + sum.kn.rank2 + sum.kn.rank3 + sum.kn.rank4 + sum.kn.rank5)
accuracy.kn <- total.catches.kn / length.CV
df.CV$ip_catch <- rowSums(df.CV[c(paste0("ip_rank",1:5))])
df.CV$kn_catch <- rowSums(df.CV[c(paste0("kn_rank",1:5))])
df.CV[c("phrase","ip_catch","kn_catch")]
## phrase ip_catch kn_catch
## 1 from_the_university_of_colorado 0 0
## 2 wife_has_lingered_over_the 1 1
## 3 some_might_have_actually_happened 0 0
## 4 you_go_workout_in_khakis 0 0
## 5 of_ideas_and_farce_and 1 1
## 6 one_more_task_tuesday_cutting 0 0
## 7 school_has_spoken_to_him 1 1
## 8 in_writing_almost_exclusively_about 0 0
## 9 impediments_at_the_time_of 1 1
## 10 by_the_red_beet_stains 0 0
## 11 academy_award_winning_film_real 0 0
## 12 well_i_hope_things_get 1 1
## 13 issue_with_the_fool_that 0 0
## 14 for_several_more_hours_you 0 0
## 15 you_guys_had_an_awesome 1 1
## 16 may_be_a_gambit_based 0 0
## 17 show_you_know_that_episode 0 0
## 18 for_historical_shows_with_great 0 0
## 19 is_probably_the_biggest_party 0 0
## 20 tablet_makes_sense_for_limiting 0 0
## 21 people_call_for_the_bitter 0 0
## 22 then_picked_up_a_battered 0 0
## 23 done_such_a_miraculous_thing 0 0
## 24 chairman_of_regional_urology_at 0 0
## 25 college_game_is_the_one-and-done 0 0
## 26 energy_of_an_impact_and 0 0
## 27 northeastern_congo_two_prominent_militias 0 0
## 28 gallery_aferro_suzanne_kammen_and 0 0
## 29 again_i_don't_know_the 0 0
## 30 first_home_run_hit_by 1 1
## 31 worked_while_they_won_two 1 1
## 32 more_vulnerable_to_simple_things 1 1
## 33 with_pakistan's_nuclear_arsenal_as 0 0
## 34 join_the_mountain_west_conference 1 1
## 35 i_cannot_express_ow_much 0 0
## 36 nation_in_york_and_naperville 0 0
## 37 friday_ladies_free_all_night 1 1
## 38 will_be_a_washing_of 0 0
## 39 declined_even_further_when_schools 0 0
## 40 i_am_studying_for_the 1 1
## 41 goes_hiking_with_me_too 0 0
## 42 i_think_we'll_be_able 1 1
## 43 meaning_students_had_to_wait 0 0
## 44 is_a_lot_of_cussing 0 0
## 45 from_each_side_this_was 0 0
## 46 to_make_reservations_contact_cynthia 0 0
## 47 have_been_the_only_person 1 1
## 48 apron-strings_with_a_constantly-expanding_bloated 0 0
## 49 you_doing_to_my_chicken 0 0
## 50 who_questioned_the_use_of 1 1
## 51 frames_bookcases_and_my_sleep 0 0
## 52 she_just_had_an_argument 0 0
## 53 which_to_hold_back_and 1 1
## 54 one_woman_asked_the_senator 0 0
## 55 there_that_read_listen_to 1 1
## 56 ranking_state_and_federal_officials 1 0
## 57 that_you_have_with_tea 0 0
## 58 order_nearly_every_table_is 0 0
## 59 card_or_project_and_link 0 0
## 60 lysol_can_to_clean_a 0 0
## 61 all_wide_awake_and_excited 0 0
## 62 class_take_you_to_lunch 0 0
## 63 may_we_draw_nearer_to 1 1
## 64 reason_why_i've_played_for 0 0
## 65 working_at_the_salvation_army 1 1
## 66 approach_would_be_to_have 1 1
## 67 landlord's_refusal_to_renew_your 1 0
## 68 a_difference_in_the_life 1 1
## 69 this_way_tastes_great_just 0 0
## 70 quite_primitive_especially_as_compared 0 0
## 71 at_an_ngo_most_of 1 1
## 72 to_throw_a_right_hand 1 1
## 73 train_in_time_to_react 0 0
## 74 but_one_dynamo_in_the 1 1
## 75 out_of_their_own_player's 0 0
## 76 wyoming_will_be_seen_on 1 0
## 77 says_analyst_egil_juliussen_of 0 0
## 78 update_session_later_this_month 1 1
## 79 pretty_far-reaching_law_for_employers 0 0
## 80 princess_is_great_for_this 0 0
## 81 a_basic_understanding_of_what 1 1
## 82 can_stop_calling_me_time 0 0
## 83 users_feel_about_android_having 0 0
## 84 deep_and_abiding_interest_in 1 1
## 85 responsibility_at_the_lower_levels 0 0
## 86 across_from_meldrum_bar_state 0 0
## 87 you_for_following_me_my 0 0
## 88 wandering_consisted_perhaps_of_a 1 1
## 89 think_negative_sugar_ray_robinson 0 0
## 90 west_they_establish_the_first 1 1
## 91 very_last_page_of_the 1 1
## 92 a_line_test_on_your 1 1
## 93 again_and_enjoyed_big-race_success 0 0
## 94 late_i'm_working_on_getting 1 1
## 95 night_to_even_their_first-round 0 0
## 96 think_i_just_saw_someone 1 1
## 97 opens_first_hotel_in_pearl 0 0
## 98 love_the_name_of_the 1 1
## 99 water_pipes_using_new_technology 0 0
## 100 the_commercial_area_and_just 0 0
## 101 i_can_try_but_i 1 1
## 102 brotha_i_wish_i_could 1 1
## 103 stack_of_papers_leading_up 1 1
## 104 and_admitted_that_i_must 0 0
## 105 we_need_money_you_have 1 1
## 106 in_tobacco_smoke_amplified_the 0 0
## 107 by_sean_brand_atri_and 0 0
## 108 with_their_finance_and_intervention 0 0
## 109 pathetic_although_genuine_feelings_for 1 1
## 110 see_some_good_friends_i 0 0
## 111 more_fleets_are_moving_south 0 0
## 112 football_coach_jerry_sandusky_received 0 0
## 113 the_long-term_survivability_of_an 0 0
## 114 to_win_tonight_instead_of 1 1
## 115 new_pic's_up_guys_and 1 1
## 116 anyone_with_fibromyalgia_please_send 0 0
## 117 reason_for_demanding_recall_of 1 1
## 118 comparable_but_the_profitability_of 1 1
## 119 and_on_the_laws_of 1 1
## 120 fielder_singled_up_the_middle 0 1
## 121 ground_staff_and_cabin_crew 0 0
## 122 and_enough_fruity_snacks_to 0 1
## 123 her_may_be_we_should 0 0
## 124 bone_marrow_paint_flows_thick 0 0
## 125 into_an_upper_elementary_classroom 0 0
## 126 will_offer_a_family-friendly_pg-rated 0 0
## 127 funny_one_pairing_i_like 0 0
## 128 get_up_early_things_get 1 1
## 129 a_conference_hosted_by_educational 0 0
## 130 ok_let's_set_up_a 1 1
## 131 delfino_is_an_unknown_but 0 0
## 132 read_gift_wrap_a_book 0 0
## 133 wait_to_cross_the_finish 0 0
## 134 want_to_hear_from_ach 0 0
## 135 older_it_is_a_privilege 0 0
## 136 games_this_summer_she_said 1 1
## 137 on_the_individual_pictured_above 1 1
## 138 to_spend_his_time_behind 0 0
## 139 so_i_wont_be_posting 0 0
## 140 westfield_designed_a_landscape_plan 0 0
## 141 is_the_thing_that_i 1 1
## 142 a_less-than-reliable_partner_in_the 0 0
## 143 wittmer_dubus_chowdhury_fehr_hartman 0 0
## 144 romania_the_caribbean_all_combining 0 0
## 145 your_turn_because_you_were 0 0
## 146 into_words_how_incredible_i 0 0
## 147 creative_and_pr_teams_wrap 0 0
## 148 insiders_ever_saw_in_wartime 0 0
## 149 percentage_for_the_salukis_who 0 0
## 150 make_such_bad_decisions_on 1 1
## 151 is_also_a_good_friend 0 0
## 152 i_loved_taking_those_step 0 0
## 153 fashion_brigade_although_i_am 1 1
## 154 crafting_and_most_days_my 0 0
## 155 therefore_i_initially_questioned_my 0 0
## 156 insurance_companies_were_wary_of 1 1
## 157 my_trips_downtown_as_i 1 1
## 158 in_you_and_spewing_on 0 0
## 159 when_the_news_was_broken 0 0
## 160 if_water_had_a_facebook 0 0
## 161 in_our_first_season_represent 0 0
## 162 be_aware_of_how_much 1 1
## 163 prurient_interests_and_being_offensive 0 0
## 164 the_u.s_economy_may_not 0 0
## 165 a_season_and_a_half 1 1
## 166 and_lucy_hollier_on_viola 0 0
## 167 the_four_series_we've_seen 1 1
## 168 to_add_more_would_be 1 1
## 169 passport_then_you_need_to 1 1
## 170 were_credit_card_fees_levied 0 0
## 171 whose_domestic_revenues_already_were 0 0
## 172 bed_frames_bookcases_and_my 0 0
## 173 it_i_don't_recall_getting 0 0
## 174 to_the_top_of_the 1 1
## 175 farner_but_cant_ya_just 0 0
## 176 has_been_close_to_impossible 0 0
## 177 in_awe_at_what_was 0 0
## 178 with_a_credit_card_belonging 0 0
## 179 and_i_get_along_just 1 1
## 180 others_however_unconscious_of_the 1 1
## 181 indicate_such_a_zone_is 0 0
## 182 me_maybe_i_would_smile 0 0
## 183 i'm_getting_back_up_there 0 0
## 184 participation_in_a_dangerous_office 0 0
## 185 little_work_the_majority_of 1 1
## 186 they_wasnt_meant_to_be 1 1
## 187 because_bug_and_mr_a 0 0
## 188 that_is_not_the_wait 0 0
## 189 our_separate_work_life_and 1 1
## 190 of_singing_and_moving_and 0 0
## 191 oh_well_then_you_know 1 1
## 192 were_recorded_in_aztec_codices 0 0
## 193 the_frame_i_used_on 0 0
## 194 in_chicago_with_the_family 0 0
## 195 she_could_stand_on_a 1 1
## 196 implies_this_type_of_water 0 0
## 197 store_was_doing_booming_business 0 0
## 198 was_no_evidence_dalal_had 0 0
## 199 by_that_point_most_of 1 1
## 200 relatively_rarely_instituted_on_preservice 0 0
## 201 or_parochial_schools_but_could 0 0
## 202 and_a_brother_tim_cuddeback 0 0
## 203 goodman_went_about_following_his 0 0
## 204 transit_dream_won't_go_down 1 1
## 205 a_good_rub_down_so 0 0
## 206 el_pollo_norteno_in_garden 0 0
## 207 to_get_out_there_try 0 0
## 208 but_i_thought_it_would 1 1
## 209 the_bailouts_give_greece_enough 0 0
## 210 helping_a_guest_at_my 0 0
## 211 has_to_use_it_for 1 1
## 212 stick_and_you_just_brushed 0 0
## 213 always_sided_with_the_vaccine 0 0
## 214 story_and_some_viewers_will 0 0
## 215 while_you_are_cooking_the 1 1
## 216 should_something_happen_to_the 1 1
## 217 i_came_back_with_a 1 1
## 218 university_programs_classes_are_taught 0 0
## 219 take_care_of_yourself_sir 0 0
## 220 it_would_go_down_as 1 1
## 221 they_wanted_to_make_sure 1 1
## 222 encourage_their_peers_to_examine 0 0
## 223 it's_hard_when_i_miss 0 0
## 224 lips_and_smiles_but_i 1 1
## 225 to_link_past_and_present 1 1
## 226 i_am_trying_to_find 0 0
## 227 quite_sure_when_exactly_we'll 0 0
## 228 coffee_drink_just_say_the 1 1
## 229 and_says_they_should_beat 0 0
## 230 doubt_junior_seau_took_his 0 0
## 231 is_a_noticeable_hop_flavor 1 1
## 232 whether_it_was_the_business 0 0
## 233 if_i_see_one_more 1 1
## 234 week_and_a_technology_bellwether 0 0
## 235 service_urged_continued_caution_on 0 0
## 236 ahh_moment_so_now_it 0 0
## 237 and_so_today_is_just 1 1
## 238 howard's_jewelry_in_mayfield_heights 0 0
## 239 oatmeal_stout_from_squatters-_and 0 0
## 240 a_tree_and_drooping_its 0 0
## 241 donovan_all_appeared_in_promotional 0 0
## 242 in_primaries_for_council_and 1 1
## 243 shock_of_what_beyonce_wore 0 0
## 244 shouldn't_be_allowed_to_not 0 0
## 245 his_intended_actual_audience_it 0 0
## 246 favorite_thrashers_our_beloved_historic 0 0
## 247 of_generating_printed_engravings_and 0 0
## 248 have_worked_on_several_categories 0 0
## 249 or_a_class_so_too 0 0
## 250 so_wrong_but_i_cant 0 0
## 251 october_mr_weathers_invited_occupy 0 0
## 252 modified_gladiator_meets_the_shootie 0 0
## 253 notes_concerning_water_supply_article 0 0
## 254 people_deeply_invested_in_their 0 0
## 255 rejoins_the_mix_at_some 0 0
## 256 the_right_meds_for_him 0 0
## 257 to_show_them_why_they 1 1
## 258 two_women_wreaking_damage_on 0 0
## 259 be_more_directly_felt_in 0 0
## 260 open_so_it_appears_that 1 1
## 261 little_disappointed_in_the_lack 0 0
## 262 when_you_have_someone_who 1 1
## 263 the_draft_but_i_managed 0 0
## 264 then_put_the_marinade_on 0 0
## 265 whom_london_is_one_giant 0 0
## 266 i_miss_your_twitter_sprees 0 0
## 267 with_bare_legs_come_summer 0 0
## 268 who_gave_valuable_radio_real 0 0
## 269 i_was_very_embarrassed_and 1 1
## 270 this_year's_concert_is_april 0 0
## 271 the_boy_has_been_found 0 0
## 272 the_indiependence_music_festival_in 1 1
## 273 this_month_went_by_now 1 1
## 274 legislature_and_skip_the_regular 0 0
## 275 renowned_creature_in_the_world 1 1
## 276 you_are_trying_to_change 0 0
## 277 enough_effort_to_climb_over 0 0
## 278 satisfy_any_of_the_purposes 0 0
## 279 couldn't_do_it_laugh_glad 0 0
## 280 animals_they_do_not_gang 0 0
## 281 to_a_nearby_section_of 1 1
## 282 forward_to_seeing_what_he 1 1
## 283 last_time_we_must_say 0 1
## 284 a_right_to_decline_to 1 1
## 285 will_follow_tonight_with_dates 0 0
## 286 can_wear_a_slim_fitting 0 0
## 287 done_something_that_has_put 0 0
## 288 and_then_sell_it_to 1 1
## 289 jones_was_ordered_to_pay 1 1
## 290 with_a_northern_ireland_setting 0 0
## 291 go_to_his_graduation_ceremony 1 1
## 292 being_seen_and_literally_calling 0 0
## 293 show_now_this_good_music 0 0
## 294 control_fragmented_content_with_each 0 0
## 295 the_legislature's_primarily_its_leadership's 0 0
## 296 by_a_staff_person_at 0 0
## 297 any_in_those_days_when 1 1
## 298 highways_can_afford_to_stay 0 0
## 299 quite_a_while_to_tell 0 0
## 300 pointed_out_conflicts_of_interest 1 1
## 301 because_of_the_links_to 1 1
## 302 do_not_be_afraid_to 1 1
## 303 at_the_hotel_but_declined 0 0
## 304 we_can_have_children_adoption 0 0
## 305 she's_probably_just_as_nervous 0 0
## 306 that_but_to_me_her 0 0
## 307 in_february_compared_with_january's 0 0
## 308 no_i_wasnt_hiding_it 0 0
## 309 during_the_project_included_underwater 0 0
## 310 calif_i_would_take_ya 0 0
## 311 any_of_it_should_have 1 1
## 312 that's_why_ohioans_elect_mayors 0 0
## 313 is_enough_room_in_the 1 1
## 314 space_to_separate_them_visually 0 0
## 315 work_as_a_writer_in 0 0
## 316 which_listed_it_for_sale 0 0
## 317 to_stay_a_separate_entity 0 0
## 318 it_is_to_a_woman 0 0
## 319 he_was_surprised_to_wake 0 0
## 320 according_to_thought_leaders_howard 0 0
## 321 on_i_have_alot_thing 0 0
## 322 i_had_the_bounce_has 0 0
## 323 punk_i_dnt_want_work 0 0
## 324 for_some_of_the_fun 0 0
## 325 filler_while_he_got_the 1 1
## 326 a_board_of_student_auditors 0 0
## 327 the_community_through_the_program 0 0
## 328 eternity_just_because_we_all 0 0
## 329 of_preference_i_think_this 0 0
## 330 ing_up_in_some_way 1 1
## 331 educated_consumer_you_can_make 1 1
## 332 website_dialidol.com_measured_the_phone 0 0
## 333 circuit_attorney_jennifer_joyce_emphasized 0 0
## 334 be_jailed_for_their_part 0 0
## 335 table_spoons_of_butter_to 0 0
## 336 glasses_three_glasses_and_not 0 0
## 337 the_departments_that_comprise_public 0 0
## 338 their_weight-loss_willpower_during_a 0 0
## 339 quick_playful_inspection_more_on 0 0
## 340 state_prison_behavior_unit_won 0 0
## 341 been_open_for_garden_week 0 0
## 342 keep_up_with_dion_waiters 0 0
## 343 a_student's_approach_to_the 1 1
## 344 just_finished_catching_up_on 1 1
## 345 tech_is_the_state's_largest 0 0
## 346 have_to_take_he_said 1 1
## 347 mana_would_love_to_connect 0 0
## 348 least_the_lobsters_get_to 1 1
## 349 the_bill_were_concerned_about 1 1
## 350 head_these_days_when_she 0 0
## 351 so_there_is_this_girl 0 0
## 352 my_brain_won't_stop_thinking 1 1
## 353 tuesday_morning_dropping_of_resumes 0 0
## 354 to_know_this_festival_also 0 0
## 355 annual_value_of_more_than 1 1
## 356 my_jersey_name_for_team 0 0
## 357 has_inspired_hundreds_of_thousands 1 1
## 358 date_on_my_gown_is 0 0
## 359 deposited_into_a_bank_account 1 1
## 360 youversion_has_hundreds_of_reading 0 0
## 361 trying_to_make_it_to 1 1
## 362 this_snowy_weather_i_think 1 1
## 363 lack_of_clarity_has_resulted 0 0
## 364 for_one_bride_may_be 1 1
## 365 take_everything_back_offline_personalization 0 0
## 366 a_chance_to_win_both 0 0
## 367 mom_to_mt_rainier_for 0 0
## 368 cast_your_vote_and_leave 1 1
## 369 keep_it_real_this_week 1 1
## 370 tiny_bite_taken_on_the 1 1
## 371 tonight_need_to_celebrate_winning 0 0
## 372 the_river_kwai_on_tcm 0 0
## 373 going_to_portland_for_a 1 1
## 374 are_probably_a_couple_of 1 1
## 375 of_casino_and_wiseguys_which 0 0
## 376 late_laugh_yayeeee_to_tomorrow 0 0
## 377 you_all_about_the_jungle 0 0
## 378 act_for_failing_to_give 0 0
## 379 pretty_simple_dishes_and_pretty 0 0
## 380 deals_are_for_high-end_hotels 0 0
## 381 honors_them_day_in_and 1 1
## 382 with_win_over_warriors_and 1 1
## 383 i_don't_eat_a_lot 1 1
## 384 service_side_of_the_project 0 0
## 385 the_number_of_guests_horses 0 0
## 386 my_husbands_photos_laugh_in 0 0
## 387 feat_more_arduous_than_that 1 1
## 388 of_wife_mother_and_successful 0 0
## 389 business_from_the_concertgoers_and 0 0
## 390 early_things_get_going_at 0 0
## 391 our_film_premiered_next_door 0 0
## 392 reply_to_this_enter_for 0 0
## 393 and_we_got_to_enjoy 0 0
## 394 of_the_denver_broncos_mark 0 0
## 395 class_who_sacrificed_themselves_to 1 1
## 396 sure_you_can_see_where 0 0
## 397 documents_for_the_legal_case 0 0
## 398 connected_to_the_east_coast 1 1
## 399 men_with_both_conditions_don't 0 0
## 400 margaret_singley_a_daughter_of 1 1
## 401 immediately_respond_to_a_request 1 1
## 402 discussing_the_ruling_with_engzell 0 0
## 403 a_potential_matchup_in_vegas 0 0
## 404 with_at_or_not_hm 0 0
## 405 appear_on_tv_talk_shows 0 0
## 406 kennedy_a_harvard_law_school 1 1
## 407 there_is_great_love_there 0 0
## 408 be_able_to_get_someone 0 0
## 409 because_you_either_trust_me 1 1
## 410 i'm_pretty_sure_i_was 1 1
## 411 to_my_school's_fun_fair 0 0
## 412 exclusively_on_our_facebook_page 0 0
## 413 album_of_the_year_also 0 0
## 414 relates_to_her_the_way 1 1
## 415 efforts_have_also_yielded_additional 0 0
## 416 it's_not_personal_it's_about 0 0
## 417 if_there_was_deliberate_social 0 0
## 418 a_lot_of_toppings_on 0 0
## 419 penitentiary_in_kansas_and_later 0 0
## 420 nancy_leigh_demoss_mary_kassian 0 0
## 421 to_encourage_the_well-being_of 0 0
## 422 entering_the_country_including_asylum-seekers 0 0
## 423 humans_human_is_precisely_that 1 1
## 424 a_cleaner_better_written_code 0 0
## 425 of_the_quarterback_studs_and 0 0
## 426 gentle_words_stopped_him_at 0 0
## 427 her_to_stay_out_of 1 1
## 428 to_a_level_i_think 1 1
## 429 of_singhvi_the_media_protected 0 0
## 430 making_and_there's_very_little 1 1
## 431 legislator_on_the_city_council 1 1
## 432 to_keep_it_that_way 1 1
## 433 wakile_mother_to_teenagers_victoria 0 0
## 434 good_beer_will_be_harmed 0 0
## 435 have_made_headlines_and_unnerved 0 0
## 436 to_cut_into_amazon's_sales 0 0
## 437 only_a_couple_of_gagging 0 0
## 438 of_the_journey_as_long 1 1
## 439 around_or_handled_by_nephews 0 0
## 440 federal_investigators_probing_the_san 0 0
## 441 somewhere_and_try_but_i 1 1
## 442 a_home_run_for_the 1 1
## 443 a_class_action_lawsuit_concerning 0 0
## 444 after_reading_it_through_twice 0 0
## 445 news_conference_beside_what_was 0 0
## 446 imagine_the_raiders_chances_last 0 0
## 447 kindness_with_the_world_the 1 1
## 448 mistakes_to_know_the_path 0 0
## 449 in_a_sold_out_building 0 0
## 450 first_millvale_game_dispute_the 1 1
## 451 up_today_will_the_worst 0 0
## 452 lasers_tattoo_removal_oh_my 1 1
## 453 had_outstanding_depth_over_the 1 1
## 454 by_comparison_has_been_a 1 1
## 455 veneer_of_seemingly_endless_riff 0 0
## 456 group_who_said_he_quit 0 0
## 457 we're_already_getting_feedback_and 1 1
## 458 it_the_language_and_tragic 0 0
## 459 and_went_on_to_capture 0 0
## 460 need_to_understand_two_crucial 0 0
## 461 sell_and_post_around_town 1 1
## 462 one_meet_where_i_could've 0 0
## 463 police_blockade_of_the_bank 0 0
## 464 have_to_do_a_good 1 1
## 465 pitchers_acquire_a_visa_mlb 0 0
## 466 so_reminiscent_but_not_quite 1 1
## 467 lower_than_i_thought_it 1 1
## 468 a_class_for_a_team 0 0
## 469 conventional_dating_methods_and_do 0 0
## 470 ball_back_to_me_and 1 1
## 471 is_because_that_is_usually 0 0
## 472 awesome_next_one_i'm_kidnapping 0 0
## 473 lagos_he_had_been_beaten 0 0
## 474 and_shameless_and_the_da's 0 0
## 475 edx_project_will_include_not 0 0
## 476 you_agree_a_guy_with 1 1
## 477 come_home_from_school_eat 0 0
## 478 that_he_had_used_the 1 1
## 479 said_that_the_sale_is 1 0
## 480 or_i'm_gonna_freaking_throw 0 0
## 481 enlisted_several_maintenance_workers_to 1 1
## 482 of_prostate-specific_antigen_or_psa 0 0
## 483 emerging_markets_and_higher_retail 0 0
## 484 also_usually_the_high_maintenance 0 0
## 485 some_tulle_that_i_burned 0 0
## 486 fork_to_mix_the_rice 0 0
## 487 the_tragedy_in_these_poems 0 0
## 488 effectively_about_your_chosen_profession 0 0
## 489 and_when_things_start_to 1 1
## 490 their_individual_business_goals_while 0 0
## 491 so_i'm_the_park_letting 0 0
## 492 high_profile_coach_for_free 0 0
## 493 political_historians_in_the_county 0 0
## 494 dear_captcha_yes_i'm_a 1 1
## 495 ban_on_small_arms_back-door 0 0
## 496 time_and_the_ginormous_grin 0 0
## 497 movie_but_we_can_only 1 1
## 498 control_faster_than_those_who 1 1
## 499 anyone_laughs_he_starts_pretty 0 0
## 500 switching_from_a_simple_oval 0 0
## 501 mortgages_they're_often_linked_to 1 1
## 502 smashwords_for_a_free_copy 1 1
## 503 is_another_way_to_protect 0 0
## 504 for_most_likely_the_rest 0 0
## 505 see_how_they_turn_things 0 0
## 506 to_raise_the_money_needed 0 0
## 507 to_witness_these_two_on 0 0
## 508 spoil_a_perfectly_good_story 0 0
## 509 president_of_dollar_deals_world 0 0
## 510 rays_of_computer_monitor_coupled 0 0
## 511 a_deadly_mixed-martial_arts_fighter 0 0
## 512 it_is_the_sheer_concentration 0 0
## 513 after_a_few_rollercoaster_days 0 0
## 514 father_who_wanted_him_to 1 1
## 515 response_times_are_too_slow 0 0
## 516 wonder_if_someone_took_my 1 1
## 517 kingpin_who_had_recruited_their 0 0
## 518 conflict-free_world_he_depicted_in 0 0
## 519 city_council_has_narrowly_rejected 0 0
## 520 blessed_to_work_we_have 1 1
## 521 they_stand_behind_the_man 0 0
## 522 so_i_don't_know_what 1 1
## 523 behaviour_that_one_can't_really 0 0
## 524 the_more_power_can_flow 0 0
## 525 would_chomp_on_them_happily 0 0
## 526 i_decided_to_go_to 1 1
## 527 much_told_charlie_he_would 0 0
## 528 love_the_ipad_mobile_site 0 0
## 529 playing_football_the_next_day 1 1
## 530 running_than_its_generals_as 0 0
## 531 photographs_in_order_to_protect 0 0
## 532 can_you_please_renane_your 0 0
## 533 despite_her_good_grades_strong 0 0
## 534 maviglio_shares_the_cahhc's_tin 0 0
## 535 the_while_feeling_important_because 0 0
## 536 nose_dived_out_of_contention 0 0
## 537 use_the_older_calendar_gathered 0 0
## 538 tornado_warning_a_few_weeks 1 1
## 539 on_the_season_my_friend 0 0
## 540 thats_the_song_you_should 0 0
## 541 waiting_anywhere_so_that_waiting 0 0
## 542 assume_that_his_appointment_will 0 0
## 543 that_prospect_was_none_other 1 1
## 544 the_old_baptists_and_the 1 1
## 545 rundown_of_quality_opponents_defeated 0 0
## 546 journalist_lisa_walter_who_covered 0 0
## 547 the_fourth_round_in_a 1 1
## 548 i_really_hope_they_do 1 1
## 549 of_the_council's_research_and 1 1
## 550 talks_about_memories_of_his 1 0
## 551 warning_or_a_severe_thunderstorm 1 1
## 552 for_the_capslock_it_was 1 1
## 553 boasts_is_an_established_industrial 0 0
## 554 quote_on_my_facebook_timeline 0 0
## 555 the_star_however_is_the 1 1
## 556 learn_to_imagine_what_something 0 0
## 557 he_shares_his_roof_and 1 1
## 558 the_pool_and_back_courtside 0 0
## 559 communications_came_through_on_its 0 0
## 560 funny_said_you_always_use 0 0
## 561 years_of_dedication_to_this 0 0
## 562 use_to_make_a_tomatoes 0 0
## 563 recently_the_central_role_of 1 1
## 564 juror_in_their_case_to 1 1
## 565 the_news_media_in_late 0 0
## 566 ingredients_i_used_were_white 0 0
## 567 he_still_hasn't_gotten_used 1 1
## 568 act_and_other_special_attendees 0 0
## 569 and_energy_and_solutions_germany 0 0
## 570 avila_a_spokesman_there_said 0 0
## 571 more_honest_and_truthful_and 1 1
## 572 effort_to_expedite_the_application 0 0
## 573 being_unsure_of_the_halter 0 0
## 574 paris_hilton_stampeding_towards_a 1 1
## 575 seasonal_vegetables_and_creative_technique 0 0
## 576 peel_finely_sliced_and_covered 0 0
## 577 p.m_disappearing_behind_his_closed 0 0
## 578 but_he_is_now_without 0 0
## 579 the_marlins_through_three_innings 1 1
## 580 more_apt_theme_something_i 1 1
## 581 week's_carnival_of_homeschooling_hosted 0 0
## 582 night_as_she_lay_in 1 1
## 583 out_of_it_in_the 1 1
## 584 month_clothes_and_a_size 0 0
## 585 had_the_second_best_time 0 0
## 586 ll_hate_the_one_and 1 1
## 587 chance_to_give_the_mia 0 0
## 588 artists_that_work_with_us 1 1
## 589 so_it_came_to_be 1 1
## 590 is_once_again_enrolled_at 1 1
## 591 tonight_the_new_james_bond 1 1
## 592 beadbacking_that_i_wanted_to 1 1
## 593 him_to_a_big_boy 0 0
## 594 is_and_there_are_no 1 1
## 595 you_prefer_a_word_derived 0 0
## 596 from_the_njdep_and_other 0 0
## 597 as_one_who_does_not 1 1
## 598 not_understand_where_they_were 1 1
## 599 maria_lost_her_footing_at 0 0
## 600 has_decided_you_will_work 0 0
## 601 the_store_getting_donuts_and 1 1
## 602 if_you're_not_using_it 1 1
## 603 for_business_writing_for_administrative 0 0
## 604 a_mission_that_combined_armored 0 0
## 605 least_for_the_next_week 1 1
## 606 and_check_out_our_music 1 1
## 607 to_tilt_the_scales_of 1 1
## 608 pointed_out_iraq_has_a 1 1
## 609 based_on_these_comments_we 0 0
## 610 challenge_myself_but_will_still 0 0
## 611 their_child_and_suffer_the 1 1
## 612 up_with_are_flashes_of 1 1
## 613 needs_job_growth_now_even 0 0
## 614 alternative_clothes_and_footwear_shop 0 0
## 615 feel_like_going_to_walmart 0 0
## 616 of_godly_marriages_in_our 0 0
## 617 in_karate_competitions_and_talked 0 0
## 618 which_we_get_a_sense 0 0
## 619 outdoor_floodlights_that_cast_a 1 1
## 620 that's_a_good_thing_right 1 1
## 621 is_unable_to_play_coach 0 0
## 622 to_try_a_scene_again 0 0
## 623 or_pointed_in_the_right 0 0
## 624 since_the_first_week_of 1 1
## 625 best_pull_from_a_pack 0 0
## 626 curious_as_to_what_i 1 1
## 627 too_and_like_many_others 1 1
## 628 in_the_park_while_i 1 1
## 629 pregnant_and_wanted_to_have 0 0
## 630 the_world_map_that_shows 0 0
## 631 national_agenda_france's_pursuit_of 1 1
## 632 has_changed_my_life_because 0 0
## 633 susanville_lassen_county_which_she 0 0
## 634 from_abq_hp_for_giving 0 0
## 635 effort_is_expected_to_pay 0 0
## 636 a_nice_little_round_of 1 1
## 637 much_of_the_region_during 0 0
## 638 rub_it_off_it_left 0 0
## 639 a_bike_lane_to_the 1 1
## 640 out_on_a_stretcher_through 0 0
## 641 with_them_for_a_few 1 1
## 642 traditions_and_we_softly_call 0 0
## 643 like_we_fixed_the_the 0 0
## 644 by_living_a_life_first 0 0
## 645 i_can_improve_my_game 0 0
## 646 team_getting_ready_for_the 1 1
## 647 excited_to_work_at_a 1 1
## 648 happier_with_their_bank_branches 0 0
## 649 of_the_way_he_was 1 1
## 650 was_not_in_order_but 0 0
## 651 our_findings_are_a_snapshot 0 0
## 652 at_a_fast_pace_mr 0 0
## 653 this_club_decided_to_give 0 0
## 654 do_something_you_try_to 1 1
## 655 recent_efforts_to_rewrite_an 0 0
## 656 shared_home_of_the_giants 1 1
## 657 film_it's_not_exactly_what 1 1
## 658 sun_finally_came_out_for 0 0
## 659 apartments_will_be_at_the 1 1
## 660 of_leveraging_investment_in_space 0 0
## 661 talk_about_why_i_believe 0 0
## 662 list_stick_to_it_unless 0 0
## 663 my_social_life_is_lived 0 0
## 664 succeed_is_second_to_none 1 1
## 665 any_turtles_might_appear_on 1 1
## 666 you_strapped_wit_an_iphone 0 0
## 667 list_of_environmental_groups_including 1 1
## 668 holding_a_meeting_to_determine 0 0
## 669 helped_shroud_the_work_of 1 1
## 670 have_reproductive_toxicity_and_are 0 0
## 671 very_satisfying_ask_me_tomorrow 0 0
## 672 the_highway_buses_and_nobody 0 0
## 673 he_was_inside_his_vehicle 0 0
## 674 this_beautiful_backyard_on_the 1 1
## 675 gardens_officials_could_not_be 1 1
## 676 may_weigh_in_as_well 1 1
## 677 another_person_who_was_present 0 0
## 678 previous_appeal_disposed_of_his 0 0
## 679 the_game_and_if_he 0 0
## 680 the_british_mandate_lands_which 0 0
## 681 students_picked_for_the_gates 0 0
## 682 and_i_hope_you_follow 0 0
## 683 will_pretty_much_convince_you 0 0
## 684 by_attacking_the_president_and 1 1
## 685 catherine_is_the_most_realistic 0 0
## 686 ask_jeff_if_he's_as 0 0
## 687 huge_oil_shortage_has_created 0 0
## 688 prettygoodwithwords_is_real_and_not 0 0
## 689 with_the_sun_at_the 1 1
## 690 district_delegates_or_delegates_from 0 0
## 691 the_other_end_it_stops 0 0
## 692 by_leveraging_their_assessment_money 0 0
## 693 head_of_state_her_countrywoman 0 0
## 694 is_not_into_getting_up 0 0
## 695 to_hug_to_rejoice_with 0 0
## 696 be_of_the_mindset_i 0 0
## 697 spend_whatever_it_wants_to 1 1
## 698 dropped_off_so_she_called 0 0
## 699 tbh_i_dont_want_summer 0 0
## 700 can_live_many_lives_through 0 0
## 701 former_michigan_state_wide_receiver 1 1
## 702 no_cellie_yesterday_so_i'm 0 0
## 703 will_be_achieved_by_using 1 1
## 704 left_and_went_to_the 1 1
## 705 a_different_person_a_better 0 0
## 706 how_does_he_keep_me 0 0
## 707 both_the_philosophy_and_psychology 0 0
## 708 their_first-round_series_in_the 0 0
## 709 rio_city_cafe_owner_bill 0 0
## 710 room_chronic_health_problems_that 1 1
## 711 subtle_inclusion_of_foreign_relations 0 0
## 712 people_who_like_him_seem 0 0
## 713 to_switch_the_schedules_they 0 0
## 714 didn't_match_her_image_of 1 1
## 715 the_western_conference_finals_if 0 0
## 716 your_exit_or_missed_your 0 0
## 717 unfamiliar_with_the_tune_enjoyed 0 0
## 718 appear_to_have_been_any 0 0
## 719 all_of_our_reflexes_and 0 0
## 720 standing_knee_deep_enjoying_the 1 1
## 721 re_financial_systems_analyst_or 0 0
## 722 think_you'll_hear_back_from 1 1
## 723 now_in_control_of_the 1 1
## 724 you_are_cooking_the_rice 0 0
## 725 all_sizes_from_small_to 0 0
## 726 for_neal's_newest_dvd_releases 0 0
## 727 camp_at_a_municipal_campground 0 0
## 728 does_anyone_know_of_a 1 1
## 729 whirlpool_for_hydrotherapy_muscle-stress_relief 0 0
## 730 a_sobering_reminder_that_life 1 1
## 731 frontier_is_a_book_you 0 0
## 732 a_distorted_picture_of_a 1 1
## 733 ty_drove_the_bus_and 1 1
## 734 intent_of_the_legislation_was 1 1
## 735 didn't_realize_it_would_be 1 1
## 736 have_a_life_and_your 0 0
## 737 want_my_push_button_dinner 0 0
## 738 imagination_to_compose_adventures_stories 0 0
## 739 cut_or_freeze_some_unused 0 0
## 740 of_the_left_foot_of 1 1
## 741 appear_in_the_illusionists_at 0 0
## 742 has_there_ever_been_that 0 0
## 743 may_be_on_his_way 1 1
## 744 bands_with_an_amazing_friend 0 0
## 745 basically_all_this_entails_is 0 0
## 746 do_you_get_a_day 0 0
## 747 and_a_plastic_button_for 1 1
## 748 an_extra_sign_might_want 1 1
## 749 come_back_hear_from_my 0 1
## 750 when_kids_see_a_toy 0 0
## 751 chicago_said_king_should_focus 0 0
## 752 is_numbingly_predictable_as_if 0 0
## 753 pointedly_refused_to_invite_three 0 0
## 754 it's_my_gameday_good_luck 1 1
## 755 habitat_for_humanity_international_in 0 0
## 756 between_a_man_a_woman 0 0
## 757 word_macguffin_and_i'm_really 0 0
## 758 over_time_perhaps_sparing_current 0 0
## 759 far_and_a_sure_bet 1 1
## 760 classical_mythology_in_particular_children 0 0
## 761 large_sections_omitted_because_of 1 1
## 762 daughter_to_get_some_ideas 0 0
## 763 very_well_still_call_it 1 1
## 764 top_not_to_many_of 1 1
## 765 her_self-esteem_what_made_it 0 0
## 766 trimmed_the_drapes_in_perfectly-coordinated 0 0
## 767 some_of_the_final_postures 0 0
## 768 thirteen_chilling_tales_from_the 1 1
## 769 so_many_downers_have_been 1 1
## 770 sustainable_practices_through_its_support 0 0
## 771 beat_ourselves_on_a_lot 1 0
## 772 no_signs_and_even_fill 0 0
## 773 idea_didn't_work_out_as 1 1
## 774 great_success_and_you_can 1 1
## 775 rather_than_sling_hoses_across 0 0
## 776 want_to_ease_yourself_in 1 1
## 777 many_families_no_one_in 0 0
## 778 parent_is_what_my_children 0 0
## 779 school_and_went_on_to 1 1
## 780 pull_out_a_come-from-behind_win 0 0
## 781 parasitic_worms_or_helminths_which 0 0
## 782 and_last_daughter_and_in 0 0
## 783 pretty_much_about_to_drop 0 0
## 784 to_jazz_it_up_for 1 1
## 785 goes_down_i_believe_in 1 1
## 786 out_of_hades_and_headed 0 0
## 787 sure_they_were_implemented_exceedingly 0 0
## 788 of_the_old_field_meadow 0 0
## 789 the_high-stakes_political_maneuvering_by 0 0
## 790 statehouse_for_recognition_by_the 1 1
## 791 piece_an_added_dimension_if 0 0
## 792 i_think_you_could_just 0 0
## 793 away_with_byrd_said_in 1 1
## 794 city_crowd_gets_to_rediscover 0 0
## 795 off_and_landing_larger_planes 0 0
## 796 his_game_has_steadily_grown 0 0
## 797 on_jobs_and_retail_sales 1 1
## 798 shaking_his_head_recoiling_at 0 0
## 799 on_or_this_storm_don't 0 0
## 800 there_were_some_weeks_i 0 0
## 801 ceiling_crisis_of_the_sort 0 0
## 802 myself_ok_then_if_he 0 0
## 803 but_not_hoin_to_bed 0 0
## 804 they_wanted_to_y'all're_probably 0 0
## 805 this_guy_is_going_to 1 1
## 806 to_get_to_hang_out 1 1
## 807 private_practice_have_to_address 0 0
## 808 twice_in_one_week_for 0 0
## 809 to_several_articles_there's_a 1 1
## 810 others_worried_that_the_hospital 0 0
## 811 they_arrive_at_our_door 0 0
## 812 ipod_touch_with_me_and 1 1
## 813 annoying_commercials_i_forget_which 0 0
## 814 a_comment_about_me_being 0 0
## 815 the_brewers_come_into_tonight's 0 0
## 816 restaurant_and_asked_to_store 0 0
## 817 the_hospital_random_science_museum 0 0
## 818 me_happy_and_i_need 0 0
## 819 or_perhaps_after_labor_day 1 1
## 820 rate_is_expected_to_rise 0 0
## 821 his_internship_at_ohio_historical 0 0
## 822 a_virtue_that_comes_from 1 1
## 823 are_so_ready_for_this 1 1
## 824 begin_offering_the_service_in 0 0
## 825 be_freshly_pressed_and_as 0 0
## 826 industry_is_a_good_thing 1 1
## 827 documents_for_four_areas_of 1 1
## 828 which_isn't_just_in_sunderland 0 0
## 829 made_national_headlines_in_one 1 1
## 830 got_there_russo_began_to 1 1
## 831 award_for_so_many_reasons 1 1
## 832 they_were_treated_at_the 1 1
## 833 are_well_come_to_come 0 0
## 834 their_experiences_in_interviews_anderson 0 0
## 835 time_if_the_agreement_is 1 1
## 836 but_now_on_to_the 1 1
## 837 i_had_crumb_cupcakes_for 1 1
## 838 to_become_the_first_team 1 1
## 839 couldn't_be_an_unbiased_juror 0 0
## 840 teaching_her_a_lesson_not 0 0
## 841 has_gone_on_for_so 0 0
## 842 day_i_emailed_my_former 0 0
## 843 far_from_his_west_los 0 0
## 844 memorial_and_if_not_for 1 1
## 845 from_girl_shot_at_washington 0 0
## 846 players_broke_ncaa_rules_by 0 0
## 847 to_double_the_relative_risk 0 0
## 848 going_anywhere_the_guy_was 0 0
## 849 generation_after_the_family_member 0 0
## 850 get_a_little_bit_of 1 1
## 851 like_sopes_and_gorditas_remain 0 0
## 852 which_is_difficult_to_do 0 0
## 853 go_upscale_and_what_the 0 0
## 854 have_high_cholesterol_or_heart 0 0
## 855 maintained_throughout_the_trials_that 0 0
## 856 happened_when_a_nurse_inaccurately 0 0
## 857 school_don't_have_to_se 0 0
## 858 and_numbers_released_wednesday_show 0 0
## 859 land_the_aircraft_carrying_nearly 0 0
## 860 its_joys_and_its_sorrows 0 0
## 861 incredibly_excited_to_have_a 1 1
## 862 to_download_albums_by_bands 0 0
## 863 the_centre_had_a_footfall 0 0
## 864 i_started_posting_on_here 0 0
## 865 the_alphabet_i'd_put_you 0 0
## 866 enchanted_is_an_adorable_novel 0 0
## 867 played_his_full_potential_at 0 0
## 868 three_galleries_and_two_bookstores 0 0
## 869 have_few_problems_assimilating_into 0 0
## 870 he_is_teetering_on_the 1 1
## 871 million_in_earnings_last_year 1 1
## 872 back_and_laugh_at_ourselves 0 0
## 873 of_knowing_if_that_is 1 1
## 874 the_way_they_choose_their 0 0
## 875 as_tenants_rights_and_housing 0 0
## 876 a_double_concerto_for_timpani 0 0
## 877 see_where_their_strengths_and 1 1
## 878 anything_about_him_from_this 0 0
## 879 it_was_met_by_law 0 0
## 880 element_an_elementary_school_fair 0 0
## 881 diligently_to_learn_thanks_to 1 1
## 882 here's_what_i've_received_recently 0 0
## 883 executives_was_the_first_leader 0 0
## 884 to_be_expected_as_the 1 1
## 885 he_says_he's_going_to 1 1
## 886 whom_ift_recently_promoted_to 1 1
## 887 i_saw_that_too_shocked 0 0
## 888 his_back_and_nothing_could 0 0
## 889 largest_group_of_fmv_packages 0 0
## 890 increasing_danger_of_not_entering 0 0
## 891 win_by_minnesota_was_the 1 1
## 892 that_with_these_guys_because 0 0
## 893 the_deals_select_from_small 0 0
## 894 at_it_like_i'm_from 0 0
## 895 just_savoring_the_pristine_and 0 0
## 896 it_and_she_can_barely 0 0
## 897 of_what_you_must_know 1 1
## 898 make_sure_the_lawn_it 0 0
## 899 over_they_had_even_more 1 1
## 900 in_many_varieties_and_cultivars 0 0
## 901 money_for_next_year_on 0 0
## 902 it's_not_surprising_that_there 0 0
## 903 looking_forward_to_playing_here 0 0
## 904 relentless_in_his_pursuit_to 0 0
## 905 and_one_month_after_their 0 0
## 906 a_growing_unease_about_the 1 1
## 907 exam_rooms_are_all_equipped 0 0
## 908 nonsmoker_and_retired_electrical_engineer 1 1
## 909 conversion_was_a_blasphemer_persecutor 0 0
## 910 can_get_messy_and_complicated 0 0
## 911 of_imagination_an_essential_and 0 0
## 912 will_use_these_type_of 1 1
## 913 for_kershaw_or_halladay_or 0 0
## 914 the_cut-off_time_for_same 0 0
## 915 president_and_congress_both_of 1 1
## 916 the_deer_then_crashed_into 1 1
## 917 broken_every_couple_of_hours 1 1
## 918 wiles_admitted_that_he_handled 0 0
## 919 of_the_time_you're_actually 0 0
## 920 grants_because_planned_parenthood_was 0 0
## 921 arguments_presume_that_one_can 1 1
## 922 see_it_on_a_different 0 0
## 923 get_are_colors_bars_page 0 0
## 924 conner_ford_finished_second_in 1 1
## 925 in_the_rochester_mn_area 0 0
## 926 to_have_quite_a_few 1 1
## 927 it's_happening_now_all_white 0 0
## 928 until_of_course_the_movie 0 0
## 929 a_cream_with_it_and 1 1
## 930 will_have_a_greater_impact 0 0
## 931 pimlico_on_tuesday_to_get 1 1
## 932 lovely_ladies_pointed_out_the 1 1
## 933 don't_forget_that_small_changes 0 0
## 934 little_opening_where_the_barn 0 0
## 935 fry_up_a_batch_without 0 0
## 936 mag_who's_comin_with_me 0 0
## 937 anti-proliferation_goals_it_got_the 0 0
## 938 these_little_glass_spice_jars 0 0
## 939 level_and_longingly_look_at 1 1
## 940 right_up_into_your_armpit 0 0
## 941 want_to_protect_her_wishing 0 0
## 942 you_stink_up_the_entire 0 0
## 943 then_sit_on_you_again 0 0
## 944 within_every_tear-_there's_love 0 0
## 945 a_critic_in_lakewood_ohio 0 0
## 946 back_kenjon_barner_and_linebacker 0 0
## 947 also_a_sophomore_said_of 0 0
## 948 and_the_sports_they_might 0 0
## 949 table_next_to_him_siting 0 0
## 950 south_korea_and_a_shot 0 0
## 951 cutting_edge_of_modern_fascist 0 0
## 952 the_original_pollock_executive_chair 0 0
## 953 of_my_friends_that_i 1 1
## 954 everything_just_to_see_your 0 0
## 955 different_environmental_pressures_will_result 0 0
## 956 trade_decided_to_take_their 0 0
## 957 life_is_so_precious_and 1 1
## 958 only_tired_in_the_morning 0 1
## 959 have_to_go_back_to 1 1
## 960 will_probably_get_taken_for 0 0
## 961 care_of_myself_more_wear 0 0
## 962 synchronized_swimming_to_boxing_to 0 0
## 963 over_yonder_in_kiwi_country 0 0
## 964 and_reacted_with_a_loud 0 0
## 965 many_food_specials_as_halloween 0 0
## 966 led_to_believe_a_significant 0 0
## 967 members_like_to_chew_on 1 1
## 968 models_have_shown_to_be 1 1
## 969 as_a_result_as_was 0 0
## 970 wrap_the_motherly_cloak_about 0 0
## 971 of_those_parts_fall_under 0 0
## 972 for_anyone_wanting_a_piercing 0 0
## 973 about_our_ovaries_and_he 0 0
## 974 he_sings_to_me_thought 0 0
## 975 sharply_curtailed_harvests_on_federal 0 0
## 976 very_essence_is_part_of 1 1
## 977 probably_different_but_i_think 1 1
## 978 re_here_today_to_close 0 0
## 979 a_shelter_established_by_the 1 1
## 980 book_was_as_much_about 0 0
## 981 of_making_me_excited_it 0 0
## 982 the_library_doesn't_have_its 0 0
## 983 mary_ann_lives_in_the 1 1
## 984 other_buses_were_driving_by 1 1
## 985 jackie_i_know_nick_was 0 0
## 986 doubt_you_could_say_they 0 0
## 987 being_successful_isn't_magic_is 1 1
## 988 my_family_pillutla_said_and 0 0
## 989 stars_and_routes_of_precession 0 0
## 990 number_will_go_up_significantly 0 0
## 991 when_you_get_tired_of 1 1
## 992 i've_seen_of_this_technology 0 0
## 993 i'm_already_completely_and_madly 0 0
## 994 if_the_bullying_continues_a 0 0
## 995 have_a_proper_poured_cement 0 0
## 996 i_have_thought_about_calling 1 0
## 997 more_than_anything_were_always 0 0
## 998 do_not_think_it_means 1 1
## 999 meditate_and_do_it_daily 0 0
## 1000 brisket_there_are_still_many 1 1
print(paste0("Total Predictions: ", length.CV))
## [1] "Total Predictions: 1000"
print(paste0("Total Catches (Interpolation): ",total.catches))
## [1] "Total Catches (Interpolation): 330"
print(paste0("Accuracy (Interpolation): ",accuracy))
## [1] "Accuracy (Interpolation): 0.33"
print(paste0("Average Time Taken per Prediction in seconds (Interpolation): ", time.taken/1000))
## [1] "Average Time Taken per Prediction in seconds (Interpolation): 0.446346651855469"
print(paste0("Total Catches (Kneser-Ney Smoothing): ",total.catches.kn))
## [1] "Total Catches (Kneser-Ney Smoothing): 328"
print(paste0("Accuracy (Kneser-Ney Smoothing): ",accuracy.kn))
## [1] "Accuracy (Kneser-Ney Smoothing): 0.328"
print(paste0("Average Time Taken per Prediction in seconds (Kneser-Ney Smoothing): ", time.taken.kn/1000))
## [1] "Average Time Taken per Prediction in seconds (Kneser-Ney Smoothing): 0.51984421484375"