This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
#install.packages("twitteR")
#install.packages("ROAuth")
#install.packages("RCurl")
library(twitteR)
## Warning: package 'twitteR' was built under R version 3.4.4
library(ROAuth)
## Warning: package 'ROAuth' was built under R version 3.4.4
library(RCurl)
## Loading required package: bitops
## [1] "Using direct authentication"
##Extract Tweets
NarendraModi.tweets = searchTwitter("NarendraModi", n=300)
##Convert it into data frame
df <- do.call("rbind", lapply(NarendraModi.tweets, as.data.frame))
df$text <- sapply(df$text, function(row) iconv(row, "latin1", "ASCII", sub=""))
df$text = gsub("(f|ht)tp(s?)://(.*)[.][a-z]+","",df$text)
sample <- df$text
###apply returns a vector
NarendraModi_text = sapply(NarendraModi.tweets, function(x) x$getText())
###lapply returns a list
df <- do.call("rbind", lapply(NarendraModi.tweets, as.data.frame))
NarendraModi_text <- sapply(df$text,function(row) iconv(row, "latin1", "ASCII", sub=""))
str(NarendraModi_text) ##gives the summary/internal structure of an R object
## Named chr [1:300] "RT @rameshbp: @narendramodi Sir, Need your intervention. Please do not turn a blind eye to the crisis. It is a "| __truncated__ ...
## - attr(*, "names")= chr [1:300] "RT @rameshbp: @narendramodi Sir, Need your intervention. Please do not turn a blind eye to the crisis. It is a "| __truncated__ "RT @WithRavishJi_: <U+091C><U+092C> <U+0909><U+0924><U+094D><U+0924><U+0930><U+093E><U+0916><U+0902><U+0921> <U"| __truncated__ "RT @SuPriyoBabul: The Goons from Minority community came, slaughtered and was given a safe haven to escape.. Th"| __truncated__ "RT @vdsharmabjp: <U+090F><U+0915> <U+0915><U+0926><U+092E> <U+0938><U+094D><U+0935><U+091A><U+094D><U+091B><U+0"| __truncated__ ...
###tm: text mining
library(tm)
## Warning: package 'tm' was built under R version 3.4.4
## Loading required package: NLP
###corpus is a collection of text documents
NarendraModi_corpus <- Corpus(VectorSource(NarendraModi_text))
NarendraModi_corpus
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 300
inspect(NarendraModi_corpus[1])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 1
##
## RT @rameshbp: @narendramodi Sir, Need your intervention. Please do not turn a blind eye to the crisis. It is a matter of livelihood for lak…
## RT @rameshbp: @narendramodi Sir, Need your intervention. Please do not turn a blind eye to the crisis. It is a matter of livelihood for lak
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.4.4
## Loading required package: RColorBrewer
NarendraModi_clean <- tm_map(NarendraModi_corpus, removePunctuation)
NarendraModi_clean <- tm_map(NarendraModi_clean, removeWords, stopwords("english"))
NarendraModi_clean <- tm_map(NarendraModi_clean, removeNumbers)
NarendraModi_clean <- tm_map(NarendraModi_clean, stripWhitespace)
wordcloud(NarendraModi_clean, random.order=F,max.words=50, col=rainbow(50), scale=c(3.5,1))