Extracted the tweets from Facebook and performed wordcloud and

Sentimental analysis on the same.

library("twitteR")
## Warning: package 'twitteR' was built under R version 3.5.1
library("ROAuth")
## Warning: package 'ROAuth' was built under R version 3.5.1
library(base64enc)
library(httpuv)
## Warning: package 'httpuv' was built under R version 3.5.1
library(tm)
## Warning: package 'tm' was built under R version 3.5.1
## Loading required package: NLP
## Warning: package 'NLP' was built under R version 3.5.1
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.5.1
## Loading required package: RColorBrewer
library(wordcloud2)
library(syuzhet)
## Warning: package 'syuzhet' was built under R version 3.5.1
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.5.1
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.1
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
## 
##     annotate
library(scales)
## Warning: package 'scales' was built under R version 3.5.1
## 
## Attaching package: 'scales'
## The following object is masked from 'package:syuzhet':
## 
##     rescale
library(reshape2)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:twitteR':
## 
##     id, location
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# install.packages('devtools')
# devtools::install_github("lchiffon/wordcloud2")


cred <- OAuthFactory$new(consumerKey='BagGgBbanzbdpPNNp8Uy6TQBP', # Consumer Key (API Key)
consumerSecret='pFxap1Jzc1fClDQ9psLNU3RKSQ5FvS2PhJz8E2R7ix0cawPKfa', #Consumer Secret (API Secret)
requestURL='https://api.twitter.com/oauth/request_token',                accessURL='https://api.twitter.com/oauth/access_token',                 authURL='https://api.twitter.com/oauth/authorize')

save(cred, file="twitter authentication.Rdata")
load("twitter authentication.Rdata")

#Access Token Secret

setup_twitter_oauth("BagGgBbanzbdpPNNp8Uy6TQBP", # Consumer Key (API Key)
                    "pFxap1Jzc1fClDQ9psLNU3RKSQ5FvS2PhJz8E2R7ix0cawPKfa", #Consumer Secret (API Secret)
                    "1076425245521731584-Ev31ZLB7Cf0idVMqDI8BxiVG2SgRnu",  # Access Token
                    "ZVUw0Z0mFrX7d6sjQxuB08l48JHhmnjmlAm86G2OPG7BS")  #Access Token Secret
## [1] "Using direct authentication"
#registerTwitterOAuth(cred)

Tweets <- userTimeline('facebook', n = 1000,includeRts = T)
TweetsDF <- twListToDF(Tweets)
dim(TweetsDF)
## [1] 1000   16
View(TweetsDF)
setwd("C:/Users/Thiru/Desktop/Assignments In Progress/Text Mining/TwitterAnalysis")

write.csv(TweetsDF, "Tweets.csv",row.names = F)

getwd()
## [1] "C:/Users/Thiru/Desktop/Assignments In Progress/Text Mining/TwitterAnalysis"
# handleTweets <- searchTwitter('DataScience', n = 10000)
# Read file
facebook <- read.csv(file.choose())
str(facebook)
## 'data.frame':    1000 obs. of  16 variables:
##  $ text         : Factor w/ 1000 levels "\"Be bold and dream big.\" Last week we were at the Grace Hopper conference to hear from women in tech and cele"| __truncated__,..: 968 175 676 639 165 236 451 924 987 985 ...
##  $ favorited    : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ favoriteCount: int  0 1 1 0 1 0 0 176 115 42 ...
##  $ replyToSN    : Factor w/ 852 levels "___cjsx","__sudarshan__",..: NA 165 648 614 155 225 431 259 259 259 ...
##  $ created      : Factor w/ 988 levels "2018-08-27 19:23:07",..: 988 987 986 985 984 983 982 981 981 981 ...
##  $ truncated    : logi  FALSE TRUE TRUE FALSE FALSE TRUE ...
##  $ replyToSID   : num  NA 1.08e+18 1.08e+18 1.08e+18 1.08e+18 ...
##  $ id           : num  1.08e+18 1.08e+18 1.08e+18 1.08e+18 1.08e+18 ...
##  $ replyToUID   : num  NA 7.04e+08 1.18e+07 3.09e+08 1.01e+18 ...
##  $ statusSource : Factor w/ 3 levels "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>",..: 1 3 3 3 3 3 3 1 1 1 ...
##  $ screenName   : Factor w/ 1 level "facebook": 1 1 1 1 1 1 1 1 1 1 ...
##  $ retweetCount : int  18 0 0 1 0 1 0 44 21 12 ...
##  $ isRetweet    : logi  TRUE FALSE FALSE FALSE FALSE FALSE ...
##  $ retweeted    : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ longitude    : logi  NA NA NA NA NA NA ...
##  $ latitude     : logi  NA NA NA NA NA NA ...
# Build Corpus and DTM/TDM
corpus <- facebook$text
corpus <- Corpus(VectorSource(corpus))
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 5
## 
## [1] RT @robjective: 2018 was a hard year full of very hard problems for our industry.  Here is some of the work we did to make ads on facebook… 
## [2] @CJMelendez_ Hi CJ. We're always trying to improve our platform. You can share your feedback regarding Page tools b… https://t.co/7akqVG0Xyo
## [3] @riker17 Hi Will. We first recommend updating to the latest version of the app or using a different browser. If you… https://t.co/mYUkcZHN8n
## [4] @prpltnkr Hi there. You can learn how to report a Page that's pretending to be you in our Help Center: https://t.co/n1CJLpv30Z. -KN         
## [5] @Chelseamarkets We're glad you were able to join us, Chelsea! -KN
# Clean the text 
corpus <- tm_map(corpus,tolower)
## Warning in tm_map.SimpleCorpus(corpus, tolower): transformation drops
## documents
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 5
## 
## [1] rt @robjective: 2018 was a hard year full of very hard problems for our industry.  here is some of the work we did to make ads on facebook… 
## [2] @cjmelendez_ hi cj. we're always trying to improve our platform. you can share your feedback regarding page tools b… https://t.co/7akqvg0xyo
## [3] @riker17 hi will. we first recommend updating to the latest version of the app or using a different browser. if you… https://t.co/myukczhn8n
## [4] @prpltnkr hi there. you can learn how to report a page that's pretending to be you in our help center: https://t.co/n1cjlpv30z. -kn         
## [5] @chelseamarkets we're glad you were able to join us, chelsea! -kn
corpus <- tm_map(corpus,removePunctuation)
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation
## drops documents
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 5
## 
## [1] rt robjective 2018 was a hard year full of very hard problems for our industry  here is some of the work we did to make ads on facebook…
## [2] cjmelendez hi cj were always trying to improve our platform you can share your feedback regarding page tools b… httpstco7akqvg0xyo      
## [3] riker17 hi will we first recommend updating to the latest version of the app or using a different browser if you… httpstcomyukczhn8n    
## [4] prpltnkr hi there you can learn how to report a page thats pretending to be you in our help center httpstcon1cjlpv30z kn                
## [5] chelseamarkets were glad you were able to join us chelsea kn
corpus <- tm_map(corpus,removeNumbers)
## Warning in tm_map.SimpleCorpus(corpus, removeNumbers): transformation drops
## documents
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 5
## 
## [1] rt robjective  was a hard year full of very hard problems for our industry  here is some of the work we did to make ads on facebook…
## [2] cjmelendez hi cj were always trying to improve our platform you can share your feedback regarding page tools b… httpstcoakqvgxyo    
## [3] riker hi will we first recommend updating to the latest version of the app or using a different browser if you… httpstcomyukczhnn   
## [4] prpltnkr hi there you can learn how to report a page thats pretending to be you in our help center httpstconcjlpvz kn               
## [5] chelseamarkets were glad you were able to join us chelsea kn
corpus_clean<-tm_map(corpus,stripWhitespace)
## Warning in tm_map.SimpleCorpus(corpus, stripWhitespace): transformation
## drops documents
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 5
## 
## [1] rt robjective  was a hard year full of very hard problems for our industry  here is some of the work we did to make ads on facebook…
## [2] cjmelendez hi cj were always trying to improve our platform you can share your feedback regarding page tools b… httpstcoakqvgxyo    
## [3] riker hi will we first recommend updating to the latest version of the app or using a different browser if you… httpstcomyukczhnn   
## [4] prpltnkr hi there you can learn how to report a page thats pretending to be you in our help center httpstconcjlpvz kn               
## [5] chelseamarkets were glad you were able to join us chelsea kn
cleanset<-tm_map(corpus,removeWords, stopwords('english'))
## Warning in tm_map.SimpleCorpus(corpus, removeWords, stopwords("english")):
## transformation drops documents
inspect(cleanset[1:5])
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 5
## 
## [1] rt robjective    hard year full   hard problems   industry       work    make ads  facebook…                    
## [2] cjmelendez hi cj  always trying  improve  platform  can share  feedback regarding page tools b… httpstcoakqvgxyo
## [3] riker hi will  first recommend updating   latest version   app  using  different browser  … httpstcomyukczhnn   
## [4] prpltnkr hi   can learn   report  page thats pretending      help center httpstconcjlpvz kn                     
## [5] chelseamarkets  glad   able  join us chelsea kn
removeURL <- function(x) gsub('http[[:alnum:]]*','',x)
cleanset <- tm_map(cleanset, content_transformer(removeURL))
## Warning in tm_map.SimpleCorpus(cleanset, content_transformer(removeURL)):
## transformation drops documents
inspect(cleanset[1:5])
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 5
## 
## [1] rt robjective    hard year full   hard problems   industry       work    make ads  facebook…    
## [2] cjmelendez hi cj  always trying  improve  platform  can share  feedback regarding page tools b… 
## [3] riker hi will  first recommend updating   latest version   app  using  different browser  …     
## [4] prpltnkr hi   can learn   report  page thats pretending      help center  kn                    
## [5] chelseamarkets  glad   able  join us chelsea kn
cleanset<-tm_map(cleanset,removeWords, c('facebook','can'))
## Warning in tm_map.SimpleCorpus(cleanset, removeWords, c("facebook",
## "can")): transformation drops documents
# Since the work Facebook and can is used more, this can be removed as we are 
# mining the tweets from Facebook only.Also the word "Can" is common english word.
# we can pull back the word "can"  if needed.


cleanset <- tm_map(cleanset, gsub,pattern = 'pages', replacement = 'page')
## Warning in tm_map.SimpleCorpus(cleanset, gsub, pattern = "pages",
## replacement = "page"): transformation drops documents
# the barplot pulls both page and pages as separate words. this should be 
# counted as one.

inspect(cleanset[1:5])
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 5
## 
## [1] rt robjective    hard year full   hard problems   industry       work    make ads  …         
## [2] cjmelendez hi cj  always trying  improve  platform   share  feedback regarding page tools b… 
## [3] riker hi will  first recommend updating   latest version   app  using  different browser  …  
## [4] prpltnkr hi    learn   report  page thats pretending      help center  kn                    
## [5] chelseamarkets  glad   able  join us chelsea kn
cleanset <- tm_map(cleanset,stripWhitespace)
## Warning in tm_map.SimpleCorpus(cleanset, stripWhitespace): transformation
## drops documents
inspect(cleanset[1:5])
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 5
## 
## [1] rt robjective hard year full hard problems industry work make ads …                    
## [2] cjmelendez hi cj always trying improve platform share feedback regarding page tools b… 
## [3] riker hi will first recommend updating latest version app using different browser …    
## [4] prpltnkr hi learn report page thats pretending help center kn                          
## [5] chelseamarkets glad able join us chelsea kn
#Term Document Matrix :
# Convert the unstructured data to structured data :
tdm <- TermDocumentMatrix(cleanset)
tdm
## <<TermDocumentMatrix (terms: 2547, documents: 1000)>>
## Non-/sparse entries: 8497/2538503
## Sparsity           : 100%
## Maximal term length: 25
## Weighting          : term frequency (tf)
# the terms indicate that there are 2547 words and 1000 documents(# of tweets) in this TDM
# Sparsity is 100% which indicates that there are lots of zero values.
tdm <- as.matrix(tdm)
tdm[1:10,1:20]
##             Docs
## Terms        1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
##   ads        1 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
##   full       1 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
##   hard       2 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
##   industry   1 0 0 0 0 0 0 0 0  0  0  0  1  0  0  0  0  0  0  0
##   make       1 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
##   problems   1 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
##   robjective 1 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
##   work       1 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
##   year       1 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
##   always     0 1 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
# Bar Plot 

w <- rowSums(tdm)  # provides the no of times a particular word has been used.
w <- subset(w, w>= 25) # Pull words that were used more than 25 times.
barplot(w, las = 2, col = rainbow(50))

# the word account as the highest frequency. This implies
# that facebook is more concerned about people's account

# Word Cloud :

w <- sort(rowSums(tdm), decreasing = TRUE) # Sort words in decreasing order.
set.seed(123)
wordcloud(words = names(w), freq = w, 
          max.words = 250,random.order = F,
          min.freq =  3, 
          colors = brewer.pal(8, 'Dark2'),
          scale = c(5,0.3),
          rot.per = 0.6)

w <- data.frame(names(w),w)
colnames(w) <- c('word','freq')
wordcloud2(w,size = 0.5, shape = 'triangle', rotateRatio = 0.5, minSize = 1)
# lettercloud 

letterCloud(w,word = "F",frequency(5), size=1)
# Sentiment Analysis for tweets:


# install.packages("syuzhet")

# Read File 
fbdata <- read.csv(file.choose(), header = TRUE)
tweets <- as.character(fbdata$text)
class(tweets)
## [1] "character"
# Obtain Sentiment scores 
s <- get_nrc_sentiment(tweets)
## Warning: package 'bindrcpp' was built under R version 3.5.1
head(s)
##   anger anticipation disgust fear joy sadness surprise trust negative
## 1     0            0       0    0   0       0        0     0        0
## 2     0            2       0    0   2       0        0     2        0
## 3     0            0       0    0   0       0        0     1        0
## 4     1            0       0    0   0       0        0     1        1
## 5     0            1       0    0   1       0        0     0        0
## 6     0            0       0    0   0       0        0     1        0
##   positive
## 1        1
## 2        2
## 3        1
## 4        2
## 5        2
## 6        1
tweets[4]
## [1] "@prpltnkr Hi there. You can learn how to report a Page that's pretending to be you in our Help Center: https://t.co/n1CJLpv30Z. -KN"
# "@prpltnkr Hi there. You can learn how to report a #Page that's 
# pretending to be you in our Help Center: https://t.co/n1CJLpv30Z. -KN
# the above tweet has value 1 for anger, value 1 for Negative 
# and value 2 for positive which reinstates that it has a mixture of 
# all three emotions in the above statement.
get_nrc_sentiment('pretending')
##   anger anticipation disgust fear joy sadness surprise trust negative
## 1     1            0       0    0   0       0        0     0        1
##   positive
## 1        0
# Pretend has one value of negative and one value for anger
get_nrc_sentiment('can learn') #1 for positive
##   anger anticipation disgust fear joy sadness surprise trust negative
## 1     0            0       0    0   0       0        0     0        0
##   positive
## 1        1
# barplot 

barplot(colSums(s), las = 2.5, col = rainbow(10),
        ylab = 'Count',main= 'Sentiment scores for Facebook Tweets')