Cleaning the Corpus
head(zomato)
[[1]]
[1] "IasAlok: RT @innovation_iitd: Unicorns or Unicorns in making founded by IIT Delhi Alumni\nFlipkart\nSnapdeal\nQuikr\nZomato\nRenew Power\nPolicy Bazaar\nSh…"
[[2]]
[1] "subho_roy05: @ZomatoIN @Zomato Delivered incomplete order last night trying to chat with support but nobody responds. Disgrace !!!"
[[3]]
[1] "LunchStopper: Order #lunchstopper food online #Swiggy #Zomato #365oranges"
[[4]]
[1] "preshikajain: RT @deepigoyal: Our delivery executives doing the right thing. #proud #zomato #delivery @deeppurpled https://t.co/VnmXpulCrA"
[[5]]
[1] "ramuubot: RT @deepigoyal: Our delivery executives doing the right thing. #proud #zomato #delivery @deeppurpled https://t.co/VnmXpulCrA"
[[6]]
[1] "gl0ryhunter: RT @Chingakutty: Zomato and Swiggy drivers remind me of the Domino's delivery guys who drove during \"30 minutes or free\" offer"
head(swiggy)
[[1]]
[1] "austinnoronha: Livemint: How #Swiggy became India’s fastest unicorn.\nhttps://t.co/73wQZRvqHm\n\nvia @GoogleNews"
[[2]]
[1] "LunchStopper: Order #lunchstopper food online #Swiggy #Zomato #365oranges"
[[3]]
[1] "IanMacKay_94: @CoachKav44 and I find a wing place in beautiful Oshawa ON... server says the place has unreal wings, so naturally… https://t.co/4b3sDlDZos"
[[4]]
[1] "rajeshar01: Insightful #success mantras from @Swiggy, running #Internet #Mobileapp #FoodDelivery #business facing #challenges… https://t.co/KHA4pYKgoc"
[[5]]
[1] "gl0ryhunter: RT @Chingakutty: Zomato and Swiggy drivers remind me of the Domino's delivery guys who drove during \"30 minutes or free\" offer"
[[6]]
[1] "Ankit0316: RT @anujrathi: How \u2066@swiggy_in\u2069 became India’s fastest unicorn! https://t.co/hZbDxOsvas"
zt <- sapply(zomato, function(x) x$getText())
st <- sapply(swiggy, function(x) x$getText())
catch.error <- function(x)
{
# Let us create a missing value for test purpose
y <- NA
# Try to catch that error (NA) which we have just created
catch_error <- tryCatch(tolower(x), error = function(e) e)
# If not an error
if(!inherits(catch_error, "error"))
y <- tolower(x)
# Check result if error exists, otherwise the function works fine
return(y)
}
cleanTweets <- function(tweet)
{
# Clean the tweet for sentiment analysis
# Remove html links, which are not required for sentiment analysis
tweet <- gsub("(f|ht) (tp) (s?) (://) (.*) [.|/] (.*)", " ", tweet)
# First we will remove the reweet entities from the sorted tweets (text)
tweet <- gsub("(RT|via) ((?:\\b\\W*@\\w+)+)", " ", tweet)
# Then remove all "#HashTags"
tweet <- gsub("#\\w+", " ", tweet)
# Then remove all "@People"
tweet <- gsub("@\\w+", " ", tweet)
# Then remove all punctuations
tweet <- gsub("[[:punct:]]", " ", tweet)
# Then remove numbers, we need only text for analysis
tweet <- gsub("[[:digit:]]", " ", tweet)
# Finally we remove all unnecessary spaces (white spaces, tabs etc)
tweet <- gsub("[ \t]{2,}", " ", tweet)
tweet <- gsub("^\\s+|\\s+$", " ", tweet)
# Convert into lowercase
tweet <- catch.error(tweet)
tweet
}
cleanTweetsAndRemoveNAs <- function(Tweets)
{
TweetsCleaned <- sapply(Tweets, cleanTweets)
# Remove the "NA" tweets from this tweet list
TweetsCleaned <- TweetsCleaned[!is.na(TweetsCleaned)]
names(TweetsCleaned) <- NULL
# Remove the repetitive tweets from this tweet list
TweetsCleaned <- unique(TweetsCleaned)
TweetsCleaned
}
length(ZomatoCleaned)
[1] 4289
length(SwiggyCleaned)
[1] 4086
Estimating Sentiment Part-A
opinion.lexicon.pos <- scan('positive-words.txt', what = 'character',
comment.char = ";")
opinion.lexicon.neg <- scan('negative-words.txt', what = 'character',
comment.char = ";")
head(opinion.lexicon.pos)
[1] "a+" "abound" "abounds" "abundance" "abundant" "accessable"
head(opinion.lexicon.neg)
[1] "2-faced" "2-faces" "abnormal" "abolish" "abominable" "abominably"
neg.words <- c(opinion.lexicon.neg, "cancellation", "wtf", "wait", "waiting" )
pos.words <- opinion.lexicon.pos
getSentimentScore <- function(sentences, words.positive, words.negative, .progress = 'None')
{
require(plyr)
require(stringr)
scores <- laply(sentences, function(sentence, words.positive, words.negative){
# Let first remove the digit, punctuation character and control characters
sentence <- gsub("[[:cntrl:]]", "", gsub("[[:punct:]]", "", gsub("\\d+", "", sentence)))
# Then lets convert all to lower sentence case
sentence <- tolower(sentence)
# Now lets split each sentence by the space delimiter
words <- unlist(str_split(sentence, "\\s+"))
# Get the boolean match of each words with the positive and negative opinion-lexicon
pos.matches <- !is.na(match(words, words.positive))
neg.matches <- !is.na(match(words, words.negative))
# Now get the score as total positive sentiment minus the total negatives
score <- sum(pos.matches) - sum(neg.matches)
return(score)
}, words.positive, words.negative, .progress = .progress)
# Return a dataframe with respective sentence and the score
return(data.frame(text = sentences, score = scores))
}
options(warn = -1)
ZomatoResults <- getSentimentScore(ZomatoCleaned, words.positive = pos.words,
words.negative = neg.words)
SwiggyResults <- getSentimentScore(SwiggyCleaned, words.positive = pos.words,
words.negative = neg.words)
library(ggplot2)
library(gridExtra)
p1 <- ggplot(ZomatoResults, aes(x = score)) + geom_bar(stat = "count", fill = "purple") +
labs(title = "Zomato Scores", x = "Scores", y = "Frequency") + xlim(c(-6, 6)) +
ylim(c(0,2500))
p2 <- ggplot(SwiggyResults, aes(x = score)) + geom_bar(stat = "count", fill = "red") +
labs(title = "Swiggy Scores", x = "Scores", y = "Frequency") + xlim(c(-6, 6)) +
ylim(c(0,2500))
grid.arrange(p1, p2, nrow = 1)

mean(ZomatoResults$score)
[1] 0.08346934
sd(ZomatoResults$score)
[1] 0.9716261
mean(SwiggyResults$score)
[1] 0.1338718
sd(SwiggyResults$score)
[1] 0.9516811
Estimating Sentiment Part-B
install.packages("Rstem",
repos = "http://www.omegahat.org/R", type="source")
require(devtools)
install_url("http://cran.r-project.org/src/contrib/Archive/sentiment/sentiment_0.2.tar.gz")
require(sentiment)
ls("package:sentiment")
library(sentiment)
# Classify emotion function returns an object of class data frame
# With seven columns (anger, disgust, fear, joy, sadness, suprise, best fit) and one row for each document
ZomatoClass <- classify_emotion(ZomatoCleaned, algorithm = "bayes", prior = 1.0)
SwiggyClass <- classify_emotion(SwiggyCleaned, algorithm = "bayes", prior = 1.0)
head(ZomatoClass)
ANGER DISGUST FEAR JOY
[1,] "1.46871776464786" "3.09234031207392" "2.06783599555953" "1.02547755260094"
[2,] "1.46871776464786" "3.09234031207392" "2.06783599555953" "1.02547755260094"
[3,] "1.46871776464786" "3.09234031207392" "2.06783599555953" "1.02547755260094"
[4,] "1.46871776464786" "3.09234031207392" "2.06783599555953" "1.02547755260094"
[5,] "1.46871776464786" "3.09234031207392" "2.06783599555953" "1.02547755260094"
[6,] "1.46871776464786" "3.09234031207392" "2.06783599555953" "1.02547755260094"
SADNESS SURPRISE BEST_FIT
[1,] "1.7277074477352" "2.78695866252273" NA
[2,] "1.7277074477352" "2.78695866252273" NA
[3,] "1.7277074477352" "2.78695866252273" NA
[4,] "1.7277074477352" "2.78695866252273" NA
[5,] "1.7277074477352" "2.78695866252273" NA
[6,] "1.7277074477352" "2.78695866252273" NA
head(SwiggyClass)
ANGER DISGUST FEAR JOY
[1,] "1.46871776464786" "3.09234031207392" "2.06783599555953" "1.02547755260094"
[2,] "1.46871776464786" "3.09234031207392" "2.06783599555953" "1.02547755260094"
[3,] "1.46871776464786" "3.09234031207392" "2.06783599555953" "1.02547755260094"
[4,] "1.46871776464786" "3.09234031207392" "2.06783599555953" "1.02547755260094"
[5,] "1.46871776464786" "3.09234031207392" "2.06783599555953" "1.02547755260094"
[6,] "1.46871776464786" "3.09234031207392" "2.06783599555953" "1.02547755260094"
SADNESS SURPRISE BEST_FIT
[1,] "1.7277074477352" "2.78695866252273" NA
[2,] "1.7277074477352" "2.78695866252273" NA
[3,] "1.7277074477352" "2.78695866252273" NA
[4,] "1.7277074477352" "2.78695866252273" NA
[5,] "1.7277074477352" "2.78695866252273" NA
[6,] "1.7277074477352" "2.78695866252273" NA
ZomatoEmotion <- ZomatoClass[, 7]
SwiggyEmotion <- SwiggyClass[, 7]
ZomatoEmotion[is.na(ZomatoEmotion)] <- "unknown"
SwiggyEmotion[is.na(SwiggyEmotion)] <- "unknown"
head(ZomatoEmotion, 20)
[1] "unknown" "unknown" "unknown" "unknown" "unknown" "unknown" "unknown" "joy"
[9] "sadness" "anger" "unknown" "sadness" "unknown" "unknown" "unknown" "unknown"
[17] "joy" "unknown" "joy" "unknown"
head(SwiggyEmotion, 20)
[1] "unknown" "unknown" "unknown" "unknown" "unknown" "unknown" "unknown" "unknown"
[9] "unknown" "unknown" "unknown" "unknown" "unknown" "unknown" "joy" "unknown"
[17] "joy" "unknown" "joy" "unknown"
ZomatoClassPol <- classify_polarity(ZomatoCleaned, algorithm = "bayes")
swiggyClassPol <- classify_polarity(SwiggyCleaned, algorithm = "bayes")
head(ZomatoClassPol)
POS NEG POS/NEG BEST_FIT
[1,] "8.78232285939751" "0.445453222112551" "19.7154772340574" "positive"
[2,] "17.2265151579293" "35.1792261323723" "0.489678627184958" "negative"
[3,] "8.78232285939751" "0.445453222112551" "19.7154772340574" "positive"
[4,] "8.78232285939751" "0.445453222112551" "19.7154772340574" "positive"
[5,] "8.78232285939751" "0.445453222112551" "19.7154772340574" "positive"
[6,] "9.47547003995745" "0.445453222112551" "21.2715265477714" "positive"
head(swiggyClassPol)
POS NEG POS/NEG BEST_FIT
[1,] "8.78232285939751" "0.445453222112551" "19.7154772340574" "positive"
[2,] "8.78232285939751" "0.445453222112551" "19.7154772340574" "positive"
[3,] "9.47547003995745" "9.47547003995745" "1" "neutral"
[4,] "9.47547003995745" "0.445453222112551" "21.2715265477714" "positive"
[5,] "8.78232285939751" "0.445453222112551" "19.7154772340574" "positive"
[6,] "8.78232285939751" "0.445453222112551" "19.7154772340574" "positive"
# We will fetch polarity category best fit for our analysis purpose
ZomatoPol <- ZomatoClassPol[, 4]
SwiggyPol <- swiggyClassPol[, 4]
# Let us create now a data frame with the above results
ZomatoDF <- data.frame(text = ZomatoCleaned, emotion = ZomatoEmotion, polarity = ZomatoPol,
stringsAsFactors = FALSE)
SwiggyDF <- data.frame(text = SwiggyCleaned, emotion = SwiggyEmotion, polarity = SwiggyPol,
stringsAsFactors = FALSE)
# Rearrange data inside the data frame by sorting it
ZomatoDF <- within(ZomatoDF, emotion <- factor(emotion, levels = names(sort(table(emotion),
decreasing = T))))
SwiggyDF <- within(SwiggyDF, emotion <- factor(emotion, levels = names(sort(table(emotion),
decreasing = T))))
head(ZomatoDF, 10)
head(SwiggyDF, 10)
plotSentiments <- function(df,title)
{
ggplot(df, aes(x = emotion)) +
geom_bar(aes(y = ..count.., fill = emotion)) +
scale_color_brewer(palette = "Dark2") +
ggtitle(title) + theme(legend.position = "right") +
ylab("Number of Tweets") +
xlab("Emotion Categories") +
ylim(c(0,4000))
}
plotSentiments(ZomatoDF,"Sentiment Analysis of Tweets on Twitter about Zomato")

plotSentiments(SwiggyDF,"Sentiment Analysis of Tweets on Twitter about Swiggy")

# Similarly we will plot distribution in the tweets
plotSentiments2 <- function(df, title)
{
ggplot(df, aes(x = polarity)) +
geom_bar(aes(y = ..count.., fill = polarity)) +
scale_color_brewer(palette = "RdGy") +
ggtitle(title) + theme(legend.position = "right") +
ylab("Number of Tweets") +
xlab("Polarity Categories") +
ylim(c(0, 4000))
}
plotSentiments2(ZomatoDF, "Polarity Analysis of Tweets on Twitter about Zomato")
plotSentiments2(SwiggyDF, "Polarity Analysis of Tweets on Twitter about Swiggy")
removeCustomeWords <- function (TweetsCleaned)
{
for(i in 1:length(TweetsCleaned))
{
TweetsCleaned[i] <- tryCatch({
TweetsCleaned[i] = removeWords(TweetsCleaned[i],
c(stopwords("english"), "care", "guys", "can",
"dis", "didn","guy" ,"booked", "plz", "order",
"ordered", "get", "hey", "also"))
TweetsCleaned[i]
}, error=function(cond)
{
TweetsCleaned[i]
}, warning=function(cond)
{
TweetsCleaned[i]
})
}
return(TweetsCleaned)
}
getWordCloud <- function(sentiment_dataframe, TweetsCleaned, Emotion)
{
emos = levels(factor(sentiment_dataframe$emotion))
n_emos = length(emos)
emo.docs = rep("", n_emos)
TweetsCleaned = removeCustomeWords(TweetsCleaned)
for (i in 1:n_emos)
{
emo.docs[i] = paste(TweetsCleaned[Emotion ==
emos[i]], collapse=" ")
}
corpus = Corpus(VectorSource(emo.docs))
tdm = TermDocumentMatrix(corpus)
tdm = as.matrix(tdm)
colnames(tdm) = emos
require(wordcloud)
suppressWarnings(comparison.cloud(tdm, colors =
brewer.pal(n_emos, "Dark2"), scale = c(3,.5), random.order = FALSE, title.size = 1.5))
}
getWordCloud(ZomatoDF, ZomatoCleaned, ZomatoEmotion)

getWordCloud(SwiggyDF, SwiggyCleaned, SwiggyEmotion)

