df=read.csv(file="Exit Survey 2016.csv")
df$QAll <- paste(df$Q1," ",df$Q2," ",df$Q3," ",df$Q4," ",df$Q5)
# Sentiment Analysis
#*******************************************************************************************
library(RColorBrewer)
library(wordcloud)
library(SnowballC)
library(RCurl)
## Loading required package: bitops
library(ggplot2)
library(ggplot2)
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(twitteR)
library(ROAuth)
library(plyr)
##
## Attaching package: 'plyr'
## The following object is masked from 'package:twitteR':
##
## id
library(stringr)
library(base64enc)
# install.packages("topicmodels")
library(topicmodels)
library(data.table)
library(stringi)
library(devtools)
#install_github('sentiment140', 'okugami79')
library(sentiment)
## Loading required package: rjson
library(qdap)
## Loading required package: qdapDictionaries
## Loading required package: qdapRegex
##
## Attaching package: 'qdapRegex'
## The following object is masked from 'package:ggplot2':
##
## %+%
## Loading required package: qdapTools
##
## Attaching package: 'qdapTools'
## The following object is masked from 'package:data.table':
##
## shift
## The following object is masked from 'package:plyr':
##
## id
## The following object is masked from 'package:twitteR':
##
## id
##
## Attaching package: 'qdap'
## The following object is masked from 'package:stringr':
##
## %>%
## The following objects are masked from 'package:tm':
##
## as.DocumentTermMatrix, as.TermDocumentMatrix
## The following object is masked from 'package:NLP':
##
## ngrams
## The following object is masked from 'package:base':
##
## Filter
library(qdap)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:qdap':
##
## %>%
## The following object is masked from 'package:qdapTools':
##
## id
## The following object is masked from 'package:qdapRegex':
##
## explain
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:twitteR':
##
## id, location
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Sentiment Analysis to identify positive/negative Responses
#create calculate.sentiment.score function to do the same
#this function takes wach Response text and custom files created with positive and negative words collection as inputs
calculate.sentiment.score <- function(sentences, positive.words, negative.words, .progress='none')
{
require(plyr)
require(stringr)
# we got a vector of sentences. plyr will handle a list or a vector as an "l" for us
# we want a simple array of scores back, so we use "l" + "a" + "ply" = laply:
scores <- laply(sentences, function(sentence, positive.words, negative.words)
{
## clean up sentences with R's regex-driven global substitute, gsub():
sentence <- gsub('[[:cntrl:]]', '', sentence)
# remove reResponses
sentence <- gsub('(RT|via)((?:\\b\\W*@\\W+)+)', '', sentence)
# remove at people
sentence <- gsub('@\\w+', '', sentence)
# remove punctuations
sentence <- gsub('[[:punct:]]', '', sentence)
# remove numbers
sentence <- gsub('[[:digit:]]', '', sentence)
# remove html links
sentence <- gsub('http[s]?\\w+', '', sentence)
# remove extra spaces
sentence <- gsub('[ \t]{2,}', '', sentence)
sentence <- gsub('^\\s+|\\s+$', '', sentence)
# removing NA's
sentence <- sentence[!is.na(sentence)]
# convert to lower case:
sentence <- tolower(sentence)
# split into words. str_split is in the stringr package
word.list <- str_split(sentence, '\\s+')
# sometimes a list() is one level of hierarchy too much
words <- unlist(word.list)
# compare our words to the dictionaries of positive & negative terms
negative.matches <- match(words, negative.words)
positive.matches <- match(words, positive.words)
# match() returns the position of the matched term or NA
# we just want a TRUE/FALSE:
positive.matches <- !is.na(positive.matches)
negative.matches <- !is.na(negative.matches)
# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
score <- sum(positive.matches) - sum(negative.matches)
return(score)
}, positive.words, negative.words, .progress=.progress )
scores.df <- data.frame(score=scores, text=sentences)
return(scores.df)
}
#Read the positvie and negative words from custom files created
positive <- scan("positive-words.txt", what= "character", comment.char= ";")
negative <- scan("negative-words.txt", what= "character", comment.char= ";")
Responses_text <- as.character(df$QAll)
#calculate the sentiment score for each Response
Employee_Sentiment <- calculate.sentiment.score(Responses_text, positive, negative, .progress="none")
#Responses with score >0 are posive and <0 are negative
Employee_Sentiment$sentiment[Employee_Sentiment$score == 0] <- "Neutral"
Employee_Sentiment$sentiment[Employee_Sentiment$score < 0] <- "Negative"
Employee_Sentiment$sentiment[Employee_Sentiment$score > 0] <- "Positive"
Employee_Sentiment$sentiment <- factor(Employee_Sentiment$sentiment)
#check the Response counts under each positve and negative score
table(Employee_Sentiment$score)
##
## -8 -6 -5 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11
## 1 1 5 17 46 104 232 283 229 185 140 89 77 48 27 24 10 6
## 12 13 14 15
## 1 1 3 1
mean(Employee_Sentiment$score)
## [1] 2.283007
median(Employee_Sentiment$score)
## [1] 2
#Display the sentiment Summary of Responses Analysed
summary(Employee_Sentiment$sentiment)
## Negative Neutral Positive
## 174 232 1124
#Plot the Sentiment summary of Responses Analysed
ggplot(data = Employee_Sentiment, aes(x = score, fill = sentiment)) +
geom_bar() +
labs(title = "Sentiment Score Bar Plot", x = "Sentiment Score", y = "Survey Count") +
scale_x_continuous(breaks = seq(-6,6,1)) +
scale_y_continuous(breaks = seq(0,4000,500)) +
scale_fill_manual(guide = guide_legend("Sentiment"), values = c("#DD0426","#246EB9","#04B430"))

#############################################################################################
############## Question 1 ###################
#############################################################################################
# Sentiment Analysis to identify positive/negative Responses
#create calculate.sentiment.score function to do the same
#this function takes wach Response text and custom files created with positive and negative words collection as inputs
calculate.sentiment.score <- function(sentences, positive.words, negative.words, .progress='none')
{
require(plyr)
require(stringr)
# we got a vector of sentences. plyr will handle a list or a vector as an "l" for us
# we want a simple array of scores back, so we use "l" + "a" + "ply" = laply:
scores <- laply(sentences, function(sentence, positive.words, negative.words)
{
## clean up sentences with R's regex-driven global substitute, gsub():
sentence <- gsub('[[:cntrl:]]', '', sentence)
# remove reResponses
sentence <- gsub('(RT|via)((?:\\b\\W*@\\W+)+)', '', sentence)
# remove at people
sentence <- gsub('@\\w+', '', sentence)
# remove punctuations
sentence <- gsub('[[:punct:]]', '', sentence)
# remove numbers
sentence <- gsub('[[:digit:]]', '', sentence)
# remove html links
sentence <- gsub('http[s]?\\w+', '', sentence)
# remove extra spaces
sentence <- gsub('[ \t]{2,}', '', sentence)
sentence <- gsub('^\\s+|\\s+$', '', sentence)
# removing NA's
sentence <- sentence[!is.na(sentence)]
# convert to lower case:
sentence <- tolower(sentence)
# split into words. str_split is in the stringr package
word.list <- str_split(sentence, '\\s+')
# sometimes a list() is one level of hierarchy too much
words <- unlist(word.list)
# compare our words to the dictionaries of positive & negative terms
negative.matches <- match(words, negative.words)
positive.matches <- match(words, positive.words)
# match() returns the position of the matched term or NA
# we just want a TRUE/FALSE:
positive.matches <- !is.na(positive.matches)
negative.matches <- !is.na(negative.matches)
# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
score <- sum(positive.matches) - sum(negative.matches)
return(score)
}, positive.words, negative.words, .progress=.progress )
scores.df <- data.frame(score=scores, text=sentences)
return(scores.df)
}
#Read the positvie and negative words from custom files created
positive <- scan("positive-words.txt", what= "character", comment.char= ";")
negative <- scan("negative-words.txt", what= "character", comment.char= ";")
Responses_text <- as.character(df$Q1)
#calculate the sentiment score for each Response
Employee_Sentiment <- calculate.sentiment.score(Responses_text, positive, negative, .progress="none")
#Responses with score >0 are posive and <0 are negative
Employee_Sentiment$sentiment[Employee_Sentiment$score == 0] <- "Neutral"
Employee_Sentiment$sentiment[Employee_Sentiment$score < 0] <- "Negative"
Employee_Sentiment$sentiment[Employee_Sentiment$score > 0] <- "Positive"
Employee_Sentiment$sentiment <- factor(Employee_Sentiment$sentiment)
#check the Response counts under each positve and negative score
table(Employee_Sentiment$score)
##
## -5 -4 -3 -2 -1 0 1 2 3 4 5 7 8
## 3 5 21 44 213 714 381 103 25 12 7 1 1
mean(Employee_Sentiment$score)
## [1] 0.2359477
median(Employee_Sentiment$score)
## [1] 0
#Display the sentiment Summary of Responses Analysed
summary(Employee_Sentiment$sentiment)
## Negative Neutral Positive
## 286 714 530
#Plot the Sentiment summary of Responses Analysed
ggplot(data = Employee_Sentiment, aes(x = score, fill = sentiment)) +
geom_bar() +
labs(title = "Sentiment Score Bar Plot", x = "Sentiment Score", y = "Response Count") +
scale_x_continuous(breaks = seq(-6,6,1)) +
scale_y_continuous(breaks = seq(0,4000,500)) +
scale_fill_manual(guide = guide_legend("Sentiment"), values = c("#DD0426","#246EB9","#04B430"))

################################################################################################
## Question 2
##########################################################################################
# Sentiment Analysis to identify positive/negative Responses
#create calculate.sentiment.score function to do the same
#this function takes wach Response text and custom files created with positive and negative words collection as inputs
calculate.sentiment.score <- function(sentences, positive.words, negative.words, .progress='none')
{
require(plyr)
require(stringr)
# we got a vector of sentences. plyr will handle a list or a vector as an "l" for us
# we want a simple array of scores back, so we use "l" + "a" + "ply" = laply:
scores <- laply(sentences, function(sentence, positive.words, negative.words)
{
## clean up sentences with R's regex-driven global substitute, gsub():
sentence <- gsub('[[:cntrl:]]', '', sentence)
# remove reResponses
sentence <- gsub('(RT|via)((?:\\b\\W*@\\W+)+)', '', sentence)
# remove at people
sentence <- gsub('@\\w+', '', sentence)
# remove punctuations
sentence <- gsub('[[:punct:]]', '', sentence)
# remove numbers
sentence <- gsub('[[:digit:]]', '', sentence)
# remove html links
sentence <- gsub('http[s]?\\w+', '', sentence)
# remove extra spaces
sentence <- gsub('[ \t]{2,}', '', sentence)
sentence <- gsub('^\\s+|\\s+$', '', sentence)
# removing NA's
sentence <- sentence[!is.na(sentence)]
# convert to lower case:
sentence <- tolower(sentence)
# split into words. str_split is in the stringr package
word.list <- str_split(sentence, '\\s+')
# sometimes a list() is one level of hierarchy too much
words <- unlist(word.list)
# compare our words to the dictionaries of positive & negative terms
negative.matches <- match(words, negative.words)
positive.matches <- match(words, positive.words)
# match() returns the position of the matched term or NA
# we just want a TRUE/FALSE:
positive.matches <- !is.na(positive.matches)
negative.matches <- !is.na(negative.matches)
# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
score <- sum(positive.matches) - sum(negative.matches)
return(score)
}, positive.words, negative.words, .progress=.progress )
scores.df <- data.frame(score=scores, text=sentences)
return(scores.df)
}
#Read the positvie and negative words from custom files created
positive <- scan("positive-words.txt", what= "character", comment.char= ";")
negative <- scan("negative-words.txt", what= "character", comment.char= ";")
Responses_text <- as.character(df$Q2)
#calculate the sentiment score for each Response
Employee_Sentiment <- calculate.sentiment.score(Responses_text, positive, negative, .progress="none")
#Responses with score >0 are posive and <0 are negative
Employee_Sentiment$sentiment[Employee_Sentiment$score == 0] <- "Neutral"
Employee_Sentiment$sentiment[Employee_Sentiment$score < 0] <- "Negative"
Employee_Sentiment$sentiment[Employee_Sentiment$score > 0] <- "Positive"
Employee_Sentiment$sentiment <- factor(Employee_Sentiment$sentiment)
#check the Response counts under each positve and negative score
table(Employee_Sentiment$score)
##
## -4 -3 -2 -1 0 1 2 3 4 5 6
## 2 2 29 130 555 431 235 95 36 13 2
mean(Employee_Sentiment$score)
## [1] 0.7875817
median(Employee_Sentiment$score)
## [1] 1
#Display the sentiment Summary of Responses Analysed
summary(Employee_Sentiment$sentiment)
## Negative Neutral Positive
## 163 555 812
#Plot the Sentiment summary of Responses Analysed
ggplot(data = Employee_Sentiment, aes(x = score, fill = sentiment)) +
geom_bar() +
labs(title = "Sentiment Score Bar Plot", x = "Sentiment Score", y = "Response Count") +
scale_x_continuous(breaks = seq(-6,6,1)) +
scale_y_continuous(breaks = seq(0,4000,500)) +
scale_fill_manual(guide = guide_legend("Sentiment"), values = c("#DD0426","#246EB9","#04B430"))

###########################################################################################
# Question3
########################################################################################
# Sentiment Analysis to identify positive/negative Responses
#create calculate.sentiment.score function to do the same
#this function takes wach Response text and custom files created with positive and negative words collection as inputs
calculate.sentiment.score <- function(sentences, positive.words, negative.words, .progress='none')
{
require(plyr)
require(stringr)
# we got a vector of sentences. plyr will handle a list or a vector as an "l" for us
# we want a simple array of scores back, so we use "l" + "a" + "ply" = laply:
scores <- laply(sentences, function(sentence, positive.words, negative.words)
{
## clean up sentences with R's regex-driven global substitute, gsub():
sentence <- gsub('[[:cntrl:]]', '', sentence)
# remove reResponses
sentence <- gsub('(RT|via)((?:\\b\\W*@\\W+)+)', '', sentence)
# remove at people
sentence <- gsub('@\\w+', '', sentence)
# remove punctuations
sentence <- gsub('[[:punct:]]', '', sentence)
# remove numbers
sentence <- gsub('[[:digit:]]', '', sentence)
# remove html links
sentence <- gsub('http[s]?\\w+', '', sentence)
# remove extra spaces
sentence <- gsub('[ \t]{2,}', '', sentence)
sentence <- gsub('^\\s+|\\s+$', '', sentence)
# removing NA's
sentence <- sentence[!is.na(sentence)]
# convert to lower case:
sentence <- tolower(sentence)
# split into words. str_split is in the stringr package
word.list <- str_split(sentence, '\\s+')
# sometimes a list() is one level of hierarchy too much
words <- unlist(word.list)
# compare our words to the dictionaries of positive & negative terms
negative.matches <- match(words, negative.words)
positive.matches <- match(words, positive.words)
# match() returns the position of the matched term or NA
# we just want a TRUE/FALSE:
positive.matches <- !is.na(positive.matches)
negative.matches <- !is.na(negative.matches)
# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
score <- sum(positive.matches) - sum(negative.matches)
return(score)
}, positive.words, negative.words, .progress=.progress )
scores.df <- data.frame(score=scores, text=sentences)
return(scores.df)
}
#Read the positvie and negative words from custom files created
positive <- scan("positive-words.txt", what= "character", comment.char= ";")
negative <- scan("negative-words.txt", what= "character", comment.char= ";")
Responses_text <- as.character(df$Q3)
#calculate the sentiment score for each Response
Employee_Sentiment <- calculate.sentiment.score(Responses_text, positive, negative, .progress="none")
#Responses with score >0 are posive and <0 are negative
Employee_Sentiment$sentiment[Employee_Sentiment$score == 0] <- "Neutral"
Employee_Sentiment$sentiment[Employee_Sentiment$score < 0] <- "Negative"
Employee_Sentiment$sentiment[Employee_Sentiment$score > 0] <- "Positive"
Employee_Sentiment$sentiment <- factor(Employee_Sentiment$sentiment)
#check the Response counts under each positve and negative score
table(Employee_Sentiment$score)
##
## -7 -4 -3 -2 -1 0 1 2 3 4 5 6 8
## 1 2 4 20 124 826 360 128 41 15 7 1 1
mean(Employee_Sentiment$score)
## [1] 0.4294118
median(Employee_Sentiment$score)
## [1] 0
#Display the sentiment Summary of Responses Analysed
summary(Employee_Sentiment$sentiment)
## Negative Neutral Positive
## 151 826 553
#Plot the Sentiment summary of Responses Analysed
ggplot(data = Employee_Sentiment, aes(x = score, fill = sentiment)) +
geom_bar() +
labs(title = "Sentiment Score Bar Plot", x = "Sentiment Score", y = "Response Count") +
scale_x_continuous(breaks = seq(-6,6,1)) +
scale_y_continuous(breaks = seq(0,4000,500)) +
scale_fill_manual(guide = guide_legend("Sentiment"), values = c("#DD0426","#246EB9","#04B430"))

###########################################################################################
# Question 4
########################################################################################
# Sentiment Analysis to identify positive/negative Responses
#create calculate.sentiment.score function to do the same
#this function takes wach Response text and custom files created with positive and negative words collection as inputs
calculate.sentiment.score <- function(sentences, positive.words, negative.words, .progress='none')
{
require(plyr)
require(stringr)
# we got a vector of sentences. plyr will handle a list or a vector as an "l" for us
# we want a simple array of scores back, so we use "l" + "a" + "ply" = laply:
scores <- laply(sentences, function(sentence, positive.words, negative.words)
{
## clean up sentences with R's regex-driven global substitute, gsub():
sentence <- gsub('[[:cntrl:]]', '', sentence)
# remove reResponses
sentence <- gsub('(RT|via)((?:\\b\\W*@\\W+)+)', '', sentence)
# remove at people
sentence <- gsub('@\\w+', '', sentence)
# remove punctuations
sentence <- gsub('[[:punct:]]', '', sentence)
# remove numbers
sentence <- gsub('[[:digit:]]', '', sentence)
# remove html links
sentence <- gsub('http[s]?\\w+', '', sentence)
# remove extra spaces
sentence <- gsub('[ \t]{2,}', '', sentence)
sentence <- gsub('^\\s+|\\s+$', '', sentence)
# removing NA's
sentence <- sentence[!is.na(sentence)]
# convert to lower case:
sentence <- tolower(sentence)
# split into words. str_split is in the stringr package
word.list <- str_split(sentence, '\\s+')
# sometimes a list() is one level of hierarchy too much
words <- unlist(word.list)
# compare our words to the dictionaries of positive & negative terms
negative.matches <- match(words, negative.words)
positive.matches <- match(words, positive.words)
# match() returns the position of the matched term or NA
# we just want a TRUE/FALSE:
positive.matches <- !is.na(positive.matches)
negative.matches <- !is.na(negative.matches)
# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
score <- sum(positive.matches) - sum(negative.matches)
return(score)
}, positive.words, negative.words, .progress=.progress )
scores.df <- data.frame(score=scores, text=sentences)
return(scores.df)
}
#Read the positvie and negative words from custom files created
positive <- scan("positive-words.txt", what= "character", comment.char= ";")
negative <- scan("negative-words.txt", what= "character", comment.char= ";")
Responses_text <- as.character(df$Q4)
#calculate the sentiment score for each Response
Employee_Sentiment <- calculate.sentiment.score(Responses_text, positive, negative, .progress="none")
#Responses with score >0 are posive and <0 are negative
Employee_Sentiment$sentiment[Employee_Sentiment$score == 0] <- "Neutral"
Employee_Sentiment$sentiment[Employee_Sentiment$score < 0] <- "Negative"
Employee_Sentiment$sentiment[Employee_Sentiment$score > 0] <- "Positive"
Employee_Sentiment$sentiment <- factor(Employee_Sentiment$sentiment)
#check the Response counts under each positve and negative score
table(Employee_Sentiment$score)
##
## -4 -3 -2 -1 0 1 2 3 4 5 6
## 1 8 15 57 762 452 176 41 11 6 1
mean(Employee_Sentiment$score)
## [1] 0.5830065
median(Employee_Sentiment$score)
## [1] 0
#Display the sentiment Summary of Responses Analysed
summary(Employee_Sentiment$sentiment)
## Negative Neutral Positive
## 81 762 687
#Plot the Sentiment summary of Responses Analysed
ggplot(data = Employee_Sentiment, aes(x = score, fill = sentiment)) +
geom_bar() +
labs(title = "Sentiment Score Bar Plot", x = "Sentiment Score", y = "Response Count") +
scale_x_continuous(breaks = seq(-6,6,1)) +
scale_y_continuous(breaks = seq(0,4000,500)) +
scale_fill_manual(guide = guide_legend("Sentiment"), values = c("#DD0426","#246EB9","#04B430"))

######################################################################################################
# Question 5
###############################################################################################
# Sentiment Analysis to identify positive/negative Responses
#create calculate.sentiment.score function to do the same
#this function takes wach Response text and custom files created with positive and negative words collection as inputs
calculate.sentiment.score <- function(sentences, positive.words, negative.words, .progress='none')
{
require(plyr)
require(stringr)
# we got a vector of sentences. plyr will handle a list or a vector as an "l" for us
# we want a simple array of scores back, so we use "l" + "a" + "ply" = laply:
scores <- laply(sentences, function(sentence, positive.words, negative.words)
{
## clean up sentences with R's regex-driven global substitute, gsub():
sentence <- gsub('[[:cntrl:]]', '', sentence)
# remove reResponses
sentence <- gsub('(RT|via)((?:\\b\\W*@\\W+)+)', '', sentence)
# remove at people
sentence <- gsub('@\\w+', '', sentence)
# remove punctuations
sentence <- gsub('[[:punct:]]', '', sentence)
# remove numbers
sentence <- gsub('[[:digit:]]', '', sentence)
# remove html links
sentence <- gsub('http[s]?\\w+', '', sentence)
# remove extra spaces
sentence <- gsub('[ \t]{2,}', '', sentence)
sentence <- gsub('^\\s+|\\s+$', '', sentence)
# removing NA's
sentence <- sentence[!is.na(sentence)]
# convert to lower case:
sentence <- tolower(sentence)
# split into words. str_split is in the stringr package
word.list <- str_split(sentence, '\\s+')
# sometimes a list() is one level of hierarchy too much
words <- unlist(word.list)
# compare our words to the dictionaries of positive & negative terms
negative.matches <- match(words, negative.words)
positive.matches <- match(words, positive.words)
# match() returns the position of the matched term or NA
# we just want a TRUE/FALSE:
positive.matches <- !is.na(positive.matches)
negative.matches <- !is.na(negative.matches)
# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
score <- sum(positive.matches) - sum(negative.matches)
return(score)
}, positive.words, negative.words, .progress=.progress )
scores.df <- data.frame(score=scores, text=sentences)
return(scores.df)
}
#Read the positvie and negative words from custom files created
positive <- scan("positive-words.txt", what= "character", comment.char= ";")
negative <- scan("negative-words.txt", what= "character", comment.char= ";")
Responses_text <- as.character(df$Q5)
#calculate the sentiment score for each Response
Employee_Sentiment <- calculate.sentiment.score(Responses_text, positive, negative, .progress="none")
#Responses with score >0 are posive and <0 are negative
Employee_Sentiment$sentiment[Employee_Sentiment$score == 0] <- "Neutral"
Employee_Sentiment$sentiment[Employee_Sentiment$score < 0] <- "Negative"
Employee_Sentiment$sentiment[Employee_Sentiment$score > 0] <- "Positive"
Employee_Sentiment$sentiment <- factor(Employee_Sentiment$sentiment)
#check the Response counts under each positve and negative score
table(Employee_Sentiment$score)
##
## -3 -2 -1 0 1 2 3 4 5 6 7
## 1 4 31 914 310 160 68 31 9 1 1
mean(Employee_Sentiment$score)
## [1] 0.6366013
median(Employee_Sentiment$score)
## [1] 0
#Display the sentiment Summary of Responses Analysed
summary(Employee_Sentiment$sentiment)
## Negative Neutral Positive
## 36 914 580
#Plot the Sentiment summary of Responses Analysed
ggplot(data = Employee_Sentiment, aes(x = score, fill = sentiment)) +
geom_bar() +
labs(title = "Sentiment Score Bar Plot", x = "Sentiment Score", y = "Response Count") +
scale_x_continuous(breaks = seq(-6,6,1)) +
scale_y_continuous(breaks = seq(0,4000,500)) +
scale_fill_manual(guide = guide_legend("Sentiment"), values = c("#DD0426","#246EB9","#04B430"))

########################################
# Word Cloud
########################################