Sentimental analysis on the same.
library("twitteR")
## Warning: package 'twitteR' was built under R version 3.5.1
library("ROAuth")
## Warning: package 'ROAuth' was built under R version 3.5.1
library(base64enc)
library(httpuv)
## Warning: package 'httpuv' was built under R version 3.5.1
library(tm)
## Warning: package 'tm' was built under R version 3.5.1
## Loading required package: NLP
## Warning: package 'NLP' was built under R version 3.5.1
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.5.1
## Loading required package: RColorBrewer
library(wordcloud2)
library(syuzhet)
## Warning: package 'syuzhet' was built under R version 3.5.1
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.5.1
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.1
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
library(scales)
## Warning: package 'scales' was built under R version 3.5.1
##
## Attaching package: 'scales'
## The following object is masked from 'package:syuzhet':
##
## rescale
library(reshape2)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
## The following objects are masked from 'package:twitteR':
##
## id, location
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# install.packages('devtools')
# devtools::install_github("lchiffon/wordcloud2")
cred <- OAuthFactory$new(consumerKey='BagGgBbanzbdpPNNp8Uy6TQBP', # Consumer Key (API Key)
consumerSecret='pFxap1Jzc1fClDQ9psLNU3RKSQ5FvS2PhJz8E2R7ix0cawPKfa', #Consumer Secret (API Secret)
requestURL='https://api.twitter.com/oauth/request_token', accessURL='https://api.twitter.com/oauth/access_token', authURL='https://api.twitter.com/oauth/authorize')
save(cred, file="twitter authentication.Rdata")
load("twitter authentication.Rdata")
#Access Token Secret
setup_twitter_oauth("BagGgBbanzbdpPNNp8Uy6TQBP", # Consumer Key (API Key)
"pFxap1Jzc1fClDQ9psLNU3RKSQ5FvS2PhJz8E2R7ix0cawPKfa", #Consumer Secret (API Secret)
"1076425245521731584-Ev31ZLB7Cf0idVMqDI8BxiVG2SgRnu", # Access Token
"ZVUw0Z0mFrX7d6sjQxuB08l48JHhmnjmlAm86G2OPG7BS") #Access Token Secret
## [1] "Using direct authentication"
#registerTwitterOAuth(cred)
Tweets <- userTimeline('facebook', n = 1000,includeRts = T)
TweetsDF <- twListToDF(Tweets)
dim(TweetsDF)
## [1] 1000 16
View(TweetsDF)
setwd("C:/Users/Thiru/Desktop/Assignments In Progress/Text Mining/TwitterAnalysis")
write.csv(TweetsDF, "Tweets.csv",row.names = F)
getwd()
## [1] "C:/Users/Thiru/Desktop/Assignments In Progress/Text Mining/TwitterAnalysis"
# handleTweets <- searchTwitter('DataScience', n = 10000)
# Read file
facebook <- read.csv(file.choose())
str(facebook)
## 'data.frame': 1000 obs. of 16 variables:
## $ text : Factor w/ 1000 levels "\"Be bold and dream big.\" Last week we were at the Grace Hopper conference to hear from women in tech and cele"| __truncated__,..: 968 175 676 639 165 236 451 924 987 985 ...
## $ favorited : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ favoriteCount: int 0 1 1 0 1 0 0 176 115 42 ...
## $ replyToSN : Factor w/ 852 levels "___cjsx","__sudarshan__",..: NA 165 648 614 155 225 431 259 259 259 ...
## $ created : Factor w/ 988 levels "2018-08-27 19:23:07",..: 988 987 986 985 984 983 982 981 981 981 ...
## $ truncated : logi FALSE TRUE TRUE FALSE FALSE TRUE ...
## $ replyToSID : num NA 1.08e+18 1.08e+18 1.08e+18 1.08e+18 ...
## $ id : num 1.08e+18 1.08e+18 1.08e+18 1.08e+18 1.08e+18 ...
## $ replyToUID : num NA 7.04e+08 1.18e+07 3.09e+08 1.01e+18 ...
## $ statusSource : Factor w/ 3 levels "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>",..: 1 3 3 3 3 3 3 1 1 1 ...
## $ screenName : Factor w/ 1 level "facebook": 1 1 1 1 1 1 1 1 1 1 ...
## $ retweetCount : int 18 0 0 1 0 1 0 44 21 12 ...
## $ isRetweet : logi TRUE FALSE FALSE FALSE FALSE FALSE ...
## $ retweeted : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ longitude : logi NA NA NA NA NA NA ...
## $ latitude : logi NA NA NA NA NA NA ...
# Build Corpus and DTM/TDM
corpus <- facebook$text
corpus <- Corpus(VectorSource(corpus))
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] RT @robjective: 2018 was a hard year full of very hard problems for our industry. Here is some of the work we did to make ads on facebook…
## [2] @CJMelendez_ Hi CJ. We're always trying to improve our platform. You can share your feedback regarding Page tools b… https://t.co/7akqVG0Xyo
## [3] @riker17 Hi Will. We first recommend updating to the latest version of the app or using a different browser. If you… https://t.co/mYUkcZHN8n
## [4] @prpltnkr Hi there. You can learn how to report a Page that's pretending to be you in our Help Center: https://t.co/n1CJLpv30Z. -KN
## [5] @Chelseamarkets We're glad you were able to join us, Chelsea! -KN
# Clean the text
corpus <- tm_map(corpus,tolower)
## Warning in tm_map.SimpleCorpus(corpus, tolower): transformation drops
## documents
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt @robjective: 2018 was a hard year full of very hard problems for our industry. here is some of the work we did to make ads on facebook…
## [2] @cjmelendez_ hi cj. we're always trying to improve our platform. you can share your feedback regarding page tools b… https://t.co/7akqvg0xyo
## [3] @riker17 hi will. we first recommend updating to the latest version of the app or using a different browser. if you… https://t.co/myukczhn8n
## [4] @prpltnkr hi there. you can learn how to report a page that's pretending to be you in our help center: https://t.co/n1cjlpv30z. -kn
## [5] @chelseamarkets we're glad you were able to join us, chelsea! -kn
corpus <- tm_map(corpus,removePunctuation)
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation
## drops documents
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt robjective 2018 was a hard year full of very hard problems for our industry here is some of the work we did to make ads on facebook…
## [2] cjmelendez hi cj were always trying to improve our platform you can share your feedback regarding page tools b… httpstco7akqvg0xyo
## [3] riker17 hi will we first recommend updating to the latest version of the app or using a different browser if you… httpstcomyukczhn8n
## [4] prpltnkr hi there you can learn how to report a page thats pretending to be you in our help center httpstcon1cjlpv30z kn
## [5] chelseamarkets were glad you were able to join us chelsea kn
corpus <- tm_map(corpus,removeNumbers)
## Warning in tm_map.SimpleCorpus(corpus, removeNumbers): transformation drops
## documents
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt robjective was a hard year full of very hard problems for our industry here is some of the work we did to make ads on facebook…
## [2] cjmelendez hi cj were always trying to improve our platform you can share your feedback regarding page tools b… httpstcoakqvgxyo
## [3] riker hi will we first recommend updating to the latest version of the app or using a different browser if you… httpstcomyukczhnn
## [4] prpltnkr hi there you can learn how to report a page thats pretending to be you in our help center httpstconcjlpvz kn
## [5] chelseamarkets were glad you were able to join us chelsea kn
corpus_clean<-tm_map(corpus,stripWhitespace)
## Warning in tm_map.SimpleCorpus(corpus, stripWhitespace): transformation
## drops documents
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt robjective was a hard year full of very hard problems for our industry here is some of the work we did to make ads on facebook…
## [2] cjmelendez hi cj were always trying to improve our platform you can share your feedback regarding page tools b… httpstcoakqvgxyo
## [3] riker hi will we first recommend updating to the latest version of the app or using a different browser if you… httpstcomyukczhnn
## [4] prpltnkr hi there you can learn how to report a page thats pretending to be you in our help center httpstconcjlpvz kn
## [5] chelseamarkets were glad you were able to join us chelsea kn
cleanset<-tm_map(corpus,removeWords, stopwords('english'))
## Warning in tm_map.SimpleCorpus(corpus, removeWords, stopwords("english")):
## transformation drops documents
inspect(cleanset[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt robjective hard year full hard problems industry work make ads facebook…
## [2] cjmelendez hi cj always trying improve platform can share feedback regarding page tools b… httpstcoakqvgxyo
## [3] riker hi will first recommend updating latest version app using different browser … httpstcomyukczhnn
## [4] prpltnkr hi can learn report page thats pretending help center httpstconcjlpvz kn
## [5] chelseamarkets glad able join us chelsea kn
removeURL <- function(x) gsub('http[[:alnum:]]*','',x)
cleanset <- tm_map(cleanset, content_transformer(removeURL))
## Warning in tm_map.SimpleCorpus(cleanset, content_transformer(removeURL)):
## transformation drops documents
inspect(cleanset[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt robjective hard year full hard problems industry work make ads facebook…
## [2] cjmelendez hi cj always trying improve platform can share feedback regarding page tools b…
## [3] riker hi will first recommend updating latest version app using different browser …
## [4] prpltnkr hi can learn report page thats pretending help center kn
## [5] chelseamarkets glad able join us chelsea kn
cleanset<-tm_map(cleanset,removeWords, c('facebook','can'))
## Warning in tm_map.SimpleCorpus(cleanset, removeWords, c("facebook",
## "can")): transformation drops documents
# Since the work Facebook and can is used more, this can be removed as we are
# mining the tweets from Facebook only.Also the word "Can" is common english word.
# we can pull back the word "can" if needed.
cleanset <- tm_map(cleanset, gsub,pattern = 'pages', replacement = 'page')
## Warning in tm_map.SimpleCorpus(cleanset, gsub, pattern = "pages",
## replacement = "page"): transformation drops documents
# the barplot pulls both page and pages as separate words. this should be
# counted as one.
inspect(cleanset[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt robjective hard year full hard problems industry work make ads …
## [2] cjmelendez hi cj always trying improve platform share feedback regarding page tools b…
## [3] riker hi will first recommend updating latest version app using different browser …
## [4] prpltnkr hi learn report page thats pretending help center kn
## [5] chelseamarkets glad able join us chelsea kn
cleanset <- tm_map(cleanset,stripWhitespace)
## Warning in tm_map.SimpleCorpus(cleanset, stripWhitespace): transformation
## drops documents
inspect(cleanset[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt robjective hard year full hard problems industry work make ads …
## [2] cjmelendez hi cj always trying improve platform share feedback regarding page tools b…
## [3] riker hi will first recommend updating latest version app using different browser …
## [4] prpltnkr hi learn report page thats pretending help center kn
## [5] chelseamarkets glad able join us chelsea kn
#Term Document Matrix :
# Convert the unstructured data to structured data :
tdm <- TermDocumentMatrix(cleanset)
tdm
## <<TermDocumentMatrix (terms: 2547, documents: 1000)>>
## Non-/sparse entries: 8497/2538503
## Sparsity : 100%
## Maximal term length: 25
## Weighting : term frequency (tf)
# the terms indicate that there are 2547 words and 1000 documents(# of tweets) in this TDM
# Sparsity is 100% which indicates that there are lots of zero values.
tdm <- as.matrix(tdm)
tdm[1:10,1:20]
## Docs
## Terms 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## ads 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## full 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## hard 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## industry 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## make 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## problems 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## robjective 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## work 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## year 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## always 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# Bar Plot
w <- rowSums(tdm) # provides the no of times a particular word has been used.
w <- subset(w, w>= 25) # Pull words that were used more than 25 times.
barplot(w, las = 2, col = rainbow(50))

# the word account as the highest frequency. This implies
# that facebook is more concerned about people's account
# Word Cloud :
w <- sort(rowSums(tdm), decreasing = TRUE) # Sort words in decreasing order.
set.seed(123)
wordcloud(words = names(w), freq = w,
max.words = 250,random.order = F,
min.freq = 3,
colors = brewer.pal(8, 'Dark2'),
scale = c(5,0.3),
rot.per = 0.6)

w <- data.frame(names(w),w)
colnames(w) <- c('word','freq')
wordcloud2(w,size = 0.5, shape = 'triangle', rotateRatio = 0.5, minSize = 1)
# lettercloud
letterCloud(w,word = "F",frequency(5), size=1)
# Sentiment Analysis for tweets:
# install.packages("syuzhet")
# Read File
fbdata <- read.csv(file.choose(), header = TRUE)
tweets <- as.character(fbdata$text)
class(tweets)
## [1] "character"
# Obtain Sentiment scores
s <- get_nrc_sentiment(tweets)
## Warning: package 'bindrcpp' was built under R version 3.5.1
head(s)
## anger anticipation disgust fear joy sadness surprise trust negative
## 1 0 0 0 0 0 0 0 0 0
## 2 0 2 0 0 2 0 0 2 0
## 3 0 0 0 0 0 0 0 1 0
## 4 1 0 0 0 0 0 0 1 1
## 5 0 1 0 0 1 0 0 0 0
## 6 0 0 0 0 0 0 0 1 0
## positive
## 1 1
## 2 2
## 3 1
## 4 2
## 5 2
## 6 1
tweets[4]
## [1] "@prpltnkr Hi there. You can learn how to report a Page that's pretending to be you in our Help Center: https://t.co/n1CJLpv30Z. -KN"
# "@prpltnkr Hi there. You can learn how to report a #Page that's
# pretending to be you in our Help Center: https://t.co/n1CJLpv30Z. -KN
# the above tweet has value 1 for anger, value 1 for Negative
# and value 2 for positive which reinstates that it has a mixture of
# all three emotions in the above statement.
get_nrc_sentiment('pretending')
## anger anticipation disgust fear joy sadness surprise trust negative
## 1 1 0 0 0 0 0 0 0 1
## positive
## 1 0
# Pretend has one value of negative and one value for anger
get_nrc_sentiment('can learn') #1 for positive
## anger anticipation disgust fear joy sadness surprise trust negative
## 1 0 0 0 0 0 0 0 0 0
## positive
## 1 1
# barplot
barplot(colSums(s), las = 2.5, col = rainbow(10),
ylab = 'Count',main= 'Sentiment scores for Facebook Tweets')
