###################
## Load Packages ##
###################
library(twitteR)
library(tm)
## Loading required package: NLP
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:twitteR':
##
## id, location
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
library(syuzhet)
Go to https://apps.twitter.com and get api_key <- 'XXXXXXXXXX' api_secret_key <- 'XXXXXXXX' access_token <- 'XXXXXXXXXXXX' access_token_secret <- 'XXXXXXXXXXXX' setup_twitter_oauth(api_key,api_secret_key,access_token,access_token_secret)
######################
## Searching Tweets ##
######################
data_tweet <- searchTwitter('$narendramodi',lang = 'en', n = 2000)
head(data_tweet)
## [[1]]
## [1] "Vedprak92928300: RT @khotikarokmodi: Lets fight for students \n\n#speakup #RRBExamDates #ActOf_MODI #5Baje5Minute #5Baje5Minutes #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F> #<U+0926><U+093F><U+0932>_<U+092E><U+0947><U+0902>_<U+092E><U+094B><U+0926><U+0940> #T…"
##
## [[2]]
## [1] "firoj_sam: RT @HansrajMeena: #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F>\n\nHello @narendramodi ji, are you listening sir???\n\nCitizens waken up to protest against your illogical decisi…"
##
## [[3]]
## [1] "RahulKu57336331: RT @RoflGandhi203: #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F>\nWOW.... Trending at # No1\nThis is the power of united youth. i am sure this day is going to be the game chan…"
##
## [[4]]
## [1] "HotelRajnish: RT @1030Ravi: #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F>\n#5Baje5Minute\n#speakup \n#RojgarDo \n#rrbexamdate \nWe want justice\nWe need more vacancies\nWe want regular schedule…"
##
## [[5]]
## [1] "THAKURP48576338: RT @SunilKu62080342: #<U+0936><U+093F><U+0915><U+094D><U+0937><U+093E><U+092E><U+093F><U+0924><U+094D><U+0930>_<U+092C><U+091A><U+093E><U+0913>\nThe election resolution letter and the assurance of @narendramodi ji was a promise of 3months which…"
##
## [[6]]
## [1] "IAmJha71025080: RT @kakoligdastidar: Where are the great #COVID data enthusiasts like @amitmalviya and @SuPriyoBabul now? Kindly provide your thoughtful in…"
## Creating CSV file ##
dataframe_data_tweet <- twListToDF(data_tweet)
write.csv(dataframe_data_tweet, file = 'C:/Users/WASIM/Documents/nmodi2_tweet.csv', row.names = FALSE)
## Importing CSV ##
nmodi <- read.csv(file.choose())
head(nmodi)
## text
## 1 RT @khotikarokmodi: Lets fight for students \n\n#speakup #RRBExamDates #ActOf_MODI #5Baje5Minute #5Baje5Minutes #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F> #<U+0926><U+093F><U+0932>_<U+092E><U+0947><U+0902>_<U+092E><U+094B><U+0926><U+0940> #T…
## 2 RT @HansrajMeena: #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F>\n\nHello @narendramodi ji, are you listening sir???\n\nCitizens waken up to protest against your illogical decisi…
## 3 RT @RoflGandhi203: #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F>\nWOW.... Trending at # No1\nThis is the power of united youth. i am sure this day is going to be the game chan…
## 4 RT @1030Ravi: #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F>\n#5Baje5Minute\n#speakup \n#RojgarDo \n#rrbexamdate \nWe want justice\nWe need more vacancies\nWe want regular schedule…
## 5 RT @SunilKu62080342: #<U+0936><U+093F><U+0915><U+094D><U+0937><U+093E><U+092E><U+093F><U+0924><U+094D><U+0930>_<U+092C><U+091A><U+093E><U+0913>\nThe election resolution letter and the assurance of @narendramodi ji was a promise of 3months which…
## 6 RT @kakoligdastidar: Where are the great #COVID data enthusiasts like @amitmalviya and @SuPriyoBabul now? Kindly provide your thoughtful in…
## favorited favoriteCount replyToSN created truncated replyToSID
## 1 FALSE 0 <NA> 2020-09-05 12:53:00 FALSE NA
## 2 FALSE 0 <NA> 2020-09-05 12:53:00 FALSE NA
## 3 FALSE 0 <NA> 2020-09-05 12:52:59 FALSE NA
## 4 FALSE 0 <NA> 2020-09-05 12:52:59 FALSE NA
## 5 FALSE 0 <NA> 2020-09-05 12:52:59 FALSE NA
## 6 FALSE 0 <NA> 2020-09-05 12:52:59 FALSE NA
## id replyToUID
## 1 1.302228e+18 NA
## 2 1.302228e+18 NA
## 3 1.302228e+18 NA
## 4 1.302228e+18 NA
## 5 1.302228e+18 NA
## 6 1.302228e+18 NA
## statusSource
## 1 <a href="https://mobile.twitter.com" rel="nofollow">Twitter Web App</a>
## 2 <a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>
## 3 <a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>
## 4 <a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>
## 5 <a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>
## 6 <a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>
## screenName retweetCount isRetweet retweeted longitude latitude
## 1 Vedprak92928300 83 TRUE FALSE NA NA
## 2 firoj_sam 501 TRUE FALSE NA NA
## 3 RahulKu57336331 2290 TRUE FALSE NA NA
## 4 HotelRajnish 239 TRUE FALSE NA NA
## 5 THAKURP48576338 71 TRUE FALSE NA NA
## 6 IAmJha71025080 360 TRUE FALSE NA NA
str(nmodi)
## 'data.frame': 2000 obs. of 16 variables:
## $ text : Factor w/ 734 levels "# speakup for teachers\n#HappyTeachersDay \n#SpeakUpForSSCRailwaysStudents \n@myogiadityanath \n@narendramodi \n@",..: 373 320 563 143 676 363 476 526 375 476 ...
## $ favorited : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ favoriteCount: int 0 0 0 0 0 0 0 0 0 0 ...
## $ replyToSN : Factor w/ 53 levels "_keshavagarwal_",..: NA NA NA NA NA NA NA NA NA NA ...
## $ created : Factor w/ 233 levels "2020-09-05 12:49:08",..: 233 233 232 232 232 232 232 232 232 232 ...
## $ truncated : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ replyToSID : num NA NA NA NA NA NA NA NA NA NA ...
## $ id : num 1.3e+18 1.3e+18 1.3e+18 1.3e+18 1.3e+18 ...
## $ replyToUID : num NA NA NA NA NA NA NA NA NA NA ...
## $ statusSource : Factor w/ 7 levels "","<a href=\"http://twitter.com/#!/download/ipad\" rel=\"nofollow\">Twitter for iPad</a>",..: 7 3 3 3 3 3 3 3 3 3 ...
## $ screenName : Factor w/ 1039 levels "__harsh_singh__",..: 976 313 659 358 955 365 823 660 721 104 ...
## $ retweetCount : int 83 501 2290 239 71 360 287 802 612 287 ...
## $ isRetweet : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ retweeted : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ longitude : logi NA NA NA NA NA NA ...
## $ latitude : logi NA NA NA NA NA NA ...
## Create corpus ##
nmodi_corpus<- iconv(nmodi$text, to = "utf-8")
nmodi_corpus<- Corpus(VectorSource(nmodi_corpus))
inspect(nmodi_corpus[1:4])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 4
##
## [1] RT @khotikarokmodi: Lets fight for students \n\n#speakup #RRBExamDates #ActOf_MODI #5Baje5Minute #5Baje5Minutes #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F> #<U+0926><U+093F><U+0932>_<U+092E><U+0947><U+0902>_<U+092E><U+094B><U+0926><U+0940> #Tâ\200¦
## [2] RT @HansrajMeena: #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F>\n\nHello @narendramodi ji, are you listening sir???\n\nCitizens waken up to protest against your illogical decisiâ\200¦
## [3] RT @RoflGandhi203: #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F>\nWOW.... Trending at # No1\nThis is the power of united youth. i am sure this day is going to be the game chanâ\200¦
## [4] RT @1030Ravi: #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F>\n#5Baje5Minute\n#speakup \n#RojgarDo \n#rrbexamdate \nWe want justice\nWe need more vacancies\nWe want regular scheduleâ\200¦
## Cleaning Dataset ##
corpus<- tm_map(nmodi_corpus, tolower)
## Warning in tm_map.SimpleCorpus(nmodi_corpus, tolower): transformation drops
## documents
inspect(nmodi_corpus[1:4])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 4
##
## [1] RT @khotikarokmodi: Lets fight for students \n\n#speakup #RRBExamDates #ActOf_MODI #5Baje5Minute #5Baje5Minutes #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F> #<U+0926><U+093F><U+0932>_<U+092E><U+0947><U+0902>_<U+092E><U+094B><U+0926><U+0940> #Tâ\200¦
## [2] RT @HansrajMeena: #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F>\n\nHello @narendramodi ji, are you listening sir???\n\nCitizens waken up to protest against your illogical decisiâ\200¦
## [3] RT @RoflGandhi203: #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F>\nWOW.... Trending at # No1\nThis is the power of united youth. i am sure this day is going to be the game chanâ\200¦
## [4] RT @1030Ravi: #5<U+092C><U+091C><U+0947>5<U+092E><U+093F><U+0928><U+093F><U+091F>\n#5Baje5Minute\n#speakup \n#RojgarDo \n#rrbexamdate \nWe want justice\nWe need more vacancies\nWe want regular scheduleâ\200¦
nmodi_corpus<- tm_map(nmodi_corpus, removePunctuation)
## Warning in tm_map.SimpleCorpus(nmodi_corpus, removePunctuation): transformation
## drops documents
inspect(nmodi_corpus[1:4])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 4
##
## [1] RT khotikarokmodi Lets fight for students \n\nspeakup RRBExamDates ActOfMODI 5Baje5Minute 5Baje5Minutes 5U092CU091CU09475U092EU093FU0928U093FU091F U0926U093FU0932U092EU0947U0902U092EU094BU0926U0940 Tâ\200¦
## [2] RT HansrajMeena 5U092CU091CU09475U092EU093FU0928U093FU091F\n\nHello narendramodi ji are you listening sir\n\nCitizens waken up to protest against your illogical decisiâ\200¦
## [3] RT RoflGandhi203 5U092CU091CU09475U092EU093FU0928U093FU091F\nWOW Trending at No1\nThis is the power of united youth i am sure this day is going to be the game chanâ\200¦
## [4] RT 1030Ravi 5U092CU091CU09475U092EU093FU0928U093FU091F\n5Baje5Minute\nspeakup \nRojgarDo \nrrbexamdate \nWe want justice\nWe need more vacancies\nWe want regular scheduleâ\200¦
nmodi_corpus<- tm_map(nmodi_corpus, removeNumbers)
## Warning in tm_map.SimpleCorpus(nmodi_corpus, removeNumbers): transformation
## drops documents
inspect(nmodi_corpus[1:4])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 4
##
## [1] RT khotikarokmodi Lets fight for students \n\nspeakup RRBExamDates ActOfMODI BajeMinute BajeMinutes UCUCUUEUFUUFUF UUFUUEUUUEUBUU Tâ\200¦
## [2] RT HansrajMeena UCUCUUEUFUUFUF\n\nHello narendramodi ji are you listening sir\n\nCitizens waken up to protest against your illogical decisiâ\200¦
## [3] RT RoflGandhi UCUCUUEUFUUFUF\nWOW Trending at No\nThis is the power of united youth i am sure this day is going to be the game chanâ\200¦
## [4] RT Ravi UCUCUUEUFUUFUF\nBajeMinute\nspeakup \nRojgarDo \nrrbexamdate \nWe want justice\nWe need more vacancies\nWe want regular scheduleâ\200¦
nmodi_corpus<-tm_map(nmodi_corpus, removeWords, stopwords('english'))
## Warning in tm_map.SimpleCorpus(nmodi_corpus, removeWords, stopwords("english")):
## transformation drops documents
inspect(nmodi_corpus[1:4])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 4
##
## [1] RT khotikarokmodi Lets fight students \n\nspeakup RRBExamDates ActOfMODI BajeMinute BajeMinutes UCUCUUEUFUUFUF UUFUUEUUUEUBUU Tâ\200¦
## [2] RT HansrajMeena UCUCUUEUFUUFUF\n\nHello narendramodi ji listening sir\n\nCitizens waken protest illogical decisiâ\200¦
## [3] RT RoflGandhi UCUCUUEUFUUFUF\nWOW Trending No\nThis power united youth sure day going game chanâ\200¦
## [4] RT Ravi UCUCUUEUFUUFUF\nBajeMinute\nspeakup \nRojgarDo \nrrbexamdate \nWe want justice\nWe need vacancies\nWe want regular scheduleâ\200¦
pull_out_URL<- function(x) gsub('http[[//:alnum]]*', '', x)
nmodi_corpus<- tm_map(nmodi_corpus, content_transformer(pull_out_URL))
## Warning in tm_map.SimpleCorpus(nmodi_corpus, content_transformer(pull_out_URL)):
## transformation drops documents
nmodi_corpus<- tm_map(nmodi_corpus, stripWhitespace)
## Warning in tm_map.SimpleCorpus(nmodi_corpus, stripWhitespace): transformation
## drops documents
inspect(nmodi_corpus[1:4])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 4
##
## [1] RT khotikarokmodi Lets fight students speakup RRBExamDates ActOfMODI BajeMinute BajeMinutes UCUCUUEUFUUFUF UUFUUEUUUEUBUU Tâ\200¦
## [2] RT HansrajMeena UCUCUUEUFUUFUF Hello narendramodi ji listening sir Citizens waken protest illogical decisiâ\200¦
## [3] RT RoflGandhi UCUCUUEUFUUFUF WOW Trending No This power united youth sure day going game chanâ\200¦
## [4] RT Ravi UCUCUUEUFUUFUF BajeMinute speakup RojgarDo rrbexamdate We want justice We need vacancies We want regular scheduleâ\200¦
nmodi_corpus<-tm_map(nmodi_corpus, removeWords, c('narendramodi','narendramodiin','shri','just', 'twitter', 'retweets','will','this','anymore'))
## Warning in tm_map.SimpleCorpus(nmodi_corpus, removeWords, c("narendramodi", :
## transformation drops documents
## Formation of term document matrix ##
nmodi_tdm <- TermDocumentMatrix(nmodi_corpus , control= list(minWordLength= c(1, Inf)))
nmodi_tdm<- as.matrix(nmodi_tdm)
nmodi_tdm[1:8, 1:10]
## Docs
## Terms 1 2 3 4 5 6 7 8 9 10
## actofmodi 1 0 0 0 0 0 0 0 1 0
## bajeminute 1 0 0 1 0 0 0 0 1 0
## bajeminutes 1 0 0 0 0 0 0 0 1 0
## fight 1 0 0 0 0 0 0 0 0 0
## khotikarokmodi 1 0 0 0 0 0 0 0 1 0
## lets 1 0 0 0 0 0 0 0 0 0
## rrbexamdates 1 0 0 0 0 0 0 0 1 0
## speakup 1 0 0 1 0 0 0 0 1 0
## Word Count and finding frequent used words##
wordcount<-rowSums(nmodi_tdm)
head(wordcount)
## actofmodi bajeminute bajeminutes fight khotikarokmodi
## 105 212 159 270 103
## lets
## 49
wordcount<- subset(wordcount, wordcount > 80)
print(wordcount)
## actofmodi bajeminute bajeminutes fight khotikarokmodi
## 105 212 159 270 103
## rrbexamdates speakup students ucucuueufuufuf uufuueuuueubuu
## 339 265 184 889 168
## citizens decisi… hansrajmeena hello illogical
## 109 86 133 97 91
## listening protest sir waken chan…
## 95 95 140 95 135
## day game going power roflgandhi
## 157 146 279 211 134
## sure this trending united wow
## 142 160 179 142 142
## youth need rrbexamdate want in…
## 282 199 109 256 119
## india … airaaaofficial honble piyushgoyal
## 113 86 143 189 171
## rail shivagopalmish vkyadava pmoindia better
## 191 87 84 191 83
## future raise voice apprentices auspicious
## 302 134 138 122 108
## blessings occasion nation godi huge
## 110 104 157 147 132
## keep media unemployed wants youths
## 138 272 153 149 146
## job anandgu engaged speakupthere your
## 86 116 119 117 83
## exam timely for faug right
## 93 134 85 85 97
## student waiting
## 83 133
## Visualisation of frequent used words##
barplot(wordcount, las=2,col = rainbow(40))
## Visualization of Public Opinion about Modi ##
na_modi<- read.csv(file.choose(), header = T)
na_modi<- iconv(na_modi$text, to = "utf-8" )
Opinion <- get_nrc_sentiment(na_modi, language = "english")
## Warning: `filter_()` is deprecated as of dplyr 0.7.0.
## Please use `filter()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
head(Opinion)
## anger anticipation disgust fear joy sadness surprise trust negative positive
## 1 1 0 0 1 0 0 0 0 1 0
## 2 0 0 0 0 0 0 0 1 1 1
## 3 1 1 0 1 1 0 1 1 0 2
## 4 0 0 0 0 0 0 0 1 0 1
## 5 0 1 0 0 1 0 0 2 0 2
## 6 0 0 0 0 0 0 0 2 0 2
barplot(colSums(Opinion), las=2, col= rainbow(10), main= 'Public views on PM Modi', ylab= 'count')