node <- "minister"
paste("^", paste(node,"$",sep=""), sep="")
[1] "^minister$"
paste("^", node,"$", sep="")
[1] "^minister$"
paste0("^", node,"$")
[1] "^minister$"
strlst <- c("^", node,"$")
paste0(strlst)
[1] "^" "minister" "$"
paste0(strlst, collapse="")
[1] "^minister$"
#The function "grepl" returns a logical vector
grepl("to",tmp2$token, ignore.case=T)
[1] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[15] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE FALSE
[29] FALSE FALSE
res.grep <- G7.data[grep("war",G7.data$token, ignore.case=T),]
aggregate(res.grep$token, by=list(res.grep$doc_id), length)
library(syuzhet)
sample_sentence = "Don't procrastinate on your assignments; managing your time effectively will help you avoid the stress of a rush deadline."
(wordLst <- get_tokens(sample_sentence))
senti_scores<-get_nrc_sentiment(wordLst)
row.names(senti_scores)
row.names(senti_scores)<-NULL
row.names(senti_scores)<-make.unique(wordLst)
View(senti_scores)
library(syuzhet)
BBC.string <- get_text_as_string("G7/BBC.txt")
BBC.words <- get_tokens(BBC.string)
head(BBC.words)
grep("minist",BBC.words, value=T)
node <- "minister"
grep(node,BBC.words, value=T)
(nodeLst <- grep("^minister$",BBC.words, value=T))
node <- "minister"
paste("^", paste(node,"$",sep=""), sep="")
library(magrittr)
node <- "minister"
paste("^", node, sep="") %>% paste(.,"$",sep="")
node <- "minister"
search_node <- paste("^", paste(node,"$",sep=""), sep="")
(nodeIndex <- grep(search_node,BBC.words, ignore.case = T))
Left1 <- BBC.words[nodeIndex-1]
Left2 <- BBC.words[nodeIndex-2]
Right1 <- BBC.words[nodeIndex+1]
Right2 <- BBC.words[nodeIndex+2]
cbind(Left2, Left1, nodeLst, Right1, Right2)
collo <- cbind(Left2, Left1, nodeLst, Right1, Right2)
colnames(collo) <- c("L2","L1","node","R1","R2")
rownames(collo) <- seq(dim(collo)[1])
collo
size <- 4
colloLst <- c()
len<-length(BBC.words)-size+1
for(i in nodeIndex) {
colloLst<-rbind(colloLst,BBC.words[(i-size):(i+size)])
}
colloLst
library(cleanNLP)
dirName <-"G7"
(files<- list.files(dirName))
filesDir <- unlist(lapply(dirName, paste, files, sep = "/"))
filesDir
lapply(filesDir, get_text_as_string) -> G7.txtset
res <- cnlp_annotate(input = G7.txtset)
dim(res$token)
head(res$token)
(tmp1<-data.frame(res$token)$token[1:30])
grep("to",tmp1, value=T)
grep("to",tmp1, ignore.case=T, value=T)
grep("to",tmp1, ignore.case=T)
tmp2<-as.data.frame.matrix(res$token)[1:30,]
View(tmp2)
grep("to",tmp2$token, value=T)
grep("to",tmp2$token, ignore.case=T, value=T)
grep("to",tmp2$token, ignore.case=T)
tmp2[grep("to",tmp2$token, ignore.case=T),]
G7.data<-as.data.frame.matrix(res$token)
View(G7.data)
G7.data[grep("war",G7.data$token, ignore.case=T),]