R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Installation of R necessary Packages

#install.packages("twitteR")
#install.packages("ROAuth")
#install.packages("RCurl")

library(twitteR)
## Warning: package 'twitteR' was built under R version 3.4.4
library(ROAuth)
## Warning: package 'ROAuth' was built under R version 3.4.4
library(RCurl)
## Loading required package: bitops

Twitter Authentication R - Script:

## [1] "Using direct authentication"

Extracting & cleaning tweets R - Script:

##Extract Tweets
NarendraModi.tweets = searchTwitter("NarendraModi", n=300)
##Convert it into data frame
df <- do.call("rbind", lapply(NarendraModi.tweets, as.data.frame))
df$text <- sapply(df$text, function(row) iconv(row, "latin1", "ASCII", sub=""))
df$text = gsub("(f|ht)tp(s?)://(.*)[.][a-z]+","",df$text)
sample <- df$text

Word cloud R - Script:

###apply returns a vector
NarendraModi_text = sapply(NarendraModi.tweets, function(x) x$getText()) 
###lapply returns a list
df <- do.call("rbind", lapply(NarendraModi.tweets, as.data.frame)) 
NarendraModi_text <- sapply(df$text,function(row) iconv(row, "latin1", "ASCII", sub=""))
str(NarendraModi_text) ##gives the summary/internal structure of an R object
##  Named chr [1:300] "RT @rameshbp: @narendramodi Sir, Need your intervention. Please do not turn a blind eye to the crisis. It is a "| __truncated__ ...
##  - attr(*, "names")= chr [1:300] "RT @rameshbp: @narendramodi Sir, Need your intervention. Please do not turn a blind eye to the crisis. It is a "| __truncated__ "RT @WithRavishJi_: <U+091C><U+092C> <U+0909><U+0924><U+094D><U+0924><U+0930><U+093E><U+0916><U+0902><U+0921> <U"| __truncated__ "RT @SuPriyoBabul: The Goons from Minority community came, slaughtered and was given a safe haven to escape.. Th"| __truncated__ "RT @vdsharmabjp: <U+090F><U+0915> <U+0915><U+0926><U+092E> <U+0938><U+094D><U+0935><U+091A><U+094D><U+091B><U+0"| __truncated__ ...

tm: text mining

###tm: text mining
library(tm) 
## Warning: package 'tm' was built under R version 3.4.4
## Loading required package: NLP
###corpus is a collection of text documents
NarendraModi_corpus <- Corpus(VectorSource(NarendraModi_text)) 
NarendraModi_corpus
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 300
inspect(NarendraModi_corpus[1])
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 1
## 
## RT @rameshbp: @narendramodi Sir, Need your intervention. Please do not turn a blind eye to the crisis. It is a matter of livelihood for lak… 
##  RT @rameshbp: @narendramodi Sir, Need your intervention. Please do not turn a blind eye to the crisis. It is a matter of livelihood for lak

Clean text

library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.4.4
## Loading required package: RColorBrewer
NarendraModi_clean <- tm_map(NarendraModi_corpus, removePunctuation)
NarendraModi_clean <- tm_map(NarendraModi_clean, removeWords, stopwords("english"))
NarendraModi_clean <- tm_map(NarendraModi_clean, removeNumbers)
NarendraModi_clean <- tm_map(NarendraModi_clean, stripWhitespace)
wordcloud(NarendraModi_clean, random.order=F,max.words=50, col=rainbow(50), scale=c(3.5,1))