Bruno González
Nov 22th, 2018
ftoken <- function(dat, n=1){
dat_tok <- gsub("[[:punct:]]", "", dat)
dat_tok <- gsub("[[:digit:]]", "", dat_tok)
dat_tok <- strsplit(dat_tok, "\\s+")
dat_tok <- unlist(dat_tok)
dat_tok <- tolower(dat_tok)
if(n>1){
dat_tok2 <- {}
for(i in 1:(length(dat_tok))){
aux <- paste(dat_tok[i])
for(j in 1:(n-1)){aux <- paste(aux,dat_tok[i+j])}
dat_tok2 [i] <- aux
}}
else{dat_tok2 <- dat_tok}
dat_tok2
}
fmatpred <- function(tokn, tokn1){
mat <- data.frame(tokn,tokn1) %>% group_by(tokn) %>% mutate(freqt=n())%>%
ungroup() %>% group_by(tokn1) %>% mutate(freq=n()/freqt) %>%
summarize(tokn=nth(tokn,1), freq=max(freq)) %>% filter(freq > 0.01)
}
ffunction <- function(l)
{
l <- l %>% group_by(tokn) %>% mutate(rank = rank(desc(freq))) %>% filter(rank<2)
}