require(tidyverse)
require(shiny)
Loading required package: shiny
# Load
require("tm")
require("SnowballC")
require("wordcloud")
require("RColorBrewer")
require("syuzhet")
require("ggplot2")
# Read the text file from local machine , choose file interactively
text <- read_lines("https://raw.githubusercontent.com/adamnorstrom/File/main/WikiData.csv")
# Load the data as a corpus
TextDoc <- Corpus(VectorSource(text))
TextDoc[1]
<<SimpleCorpus>>
Metadata: corpus specific: 1, document level (indexed): 0
Content: documents: 1
#Replacing "/", "@" and "|" with space
toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))
TextDoc <- tm_map(TextDoc, toSpace, "/")
Warning in tm_map.SimpleCorpus(TextDoc, toSpace, "/") :
transformation drops documents
TextDoc <- tm_map(TextDoc, toSpace, "@")
Warning in tm_map.SimpleCorpus(TextDoc, toSpace, "@") :
transformation drops documents
TextDoc <- tm_map(TextDoc, toSpace, "\\|")
Warning in tm_map.SimpleCorpus(TextDoc, toSpace, "\\|") :
transformation drops documents
# Convert the text to lower case
TextDoc <- tm_map(TextDoc, content_transformer(tolower))
Warning in tm_map.SimpleCorpus(TextDoc, content_transformer(tolower)) :
transformation drops documents
# Remove numbers
TextDoc <- tm_map(TextDoc, removeNumbers)
Warning in tm_map.SimpleCorpus(TextDoc, removeNumbers) :
transformation drops documents
# Remove english common stopwords
TextDoc <- tm_map(TextDoc, removeWords, stopwords("english"))
Warning in tm_map.SimpleCorpus(TextDoc, removeWords, stopwords("english")) :
transformation drops documents
# Remove your own stop word
# specify your custom stopwords as a character vector
TextDoc <- tm_map(TextDoc, removeWords, c("s", "company", "team"))
Warning in tm_map.SimpleCorpus(TextDoc, removeWords, c("s", "company", "team")) :
transformation drops documents
# Remove punctuations
TextDoc <- tm_map(TextDoc, removePunctuation)
Warning in tm_map.SimpleCorpus(TextDoc, removePunctuation) :
transformation drops documents
# Eliminate extra white spaces
TextDoc <- tm_map(TextDoc, stripWhitespace)
Warning in tm_map.SimpleCorpus(TextDoc, stripWhitespace) :
transformation drops documents
# Text stemming - which reduces words to their root form
TextDoc <- tm_map(TextDoc, stemDocument)
Warning in tm_map.SimpleCorpus(TextDoc, stemDocument) :
transformation drops documents
# Build a term-document matrix
TextDoc_dtm <- TermDocumentMatrix(TextDoc)
dtm_m <- as.matrix(TextDoc_dtm)
# Sort by descending value of frequency
dtm_v <- sort(rowSums(dtm_m),decreasing=TRUE)
dtm_d <- data.frame(word = names(dtm_v),freq=dtm_v)
# Display the top 5 most frequent words
head(dtm_d, 5)
# Plot the most frequent words
plotmain = barplot(dtm_d[1:5,]$freq, las = 2, names.arg = dtm_d[1:5,]$word,
col ="lightgreen", main ="Top 5 most frequent words",
ylab = "Word frequencies")

server <- function(input, output) {
output$plot <- renderPlot(ggplot(dtm_d[1:5,], aes(x = yield)) + # Create object called `output$plot` with a ggplot inside it
geom_bar(bins = 5, # Add a histogram to the plot
fill = "grey", # Make the fill colour grey
data = dtm_d, # Use data from `Barley`
colour = "black") # Outline the bins in black
)
}
set.seed(1234)
wordcloud(words = dtm_d$word, freq = dtm_d$freq, min.freq = 5,
max.words=100, random.order=FALSE, rot.per=0.40,
colors=brewer.pal(8, "Dark2"))

# regular sentiment score using get_sentiment() function and method of your choice
# please note that different methods may have different scales
syuzhet_vector <- get_sentiment(text, method="syuzhet")
# see the first row of the vector
head(syuzhet_vector)
[1] 0.00 1.05 1.30 4.55 1.40 3.25
# see summary statistics of the vector
summary(syuzhet_vector)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-1.800 1.125 2.850 3.517 5.225 11.050
# bing
bing_vector <- get_sentiment(text, method="bing")
head(bing_vector)
[1] 0 1 1 -1 2 -1
summary(bing_vector)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-4.000 0.000 1.000 2.118 3.000 12.000
#affin
afinn_vector <- get_sentiment(text, method="afinn")
head(afinn_vector)
[1] 0 3 6 9 5 3
summary(afinn_vector)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-5.000 1.000 5.000 6.392 9.000 33.000
#compare the first row of each vector using sign function
rbind(
sign(head(syuzhet_vector)),
sign(head(bing_vector)),
sign(head(afinn_vector))
)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 1 1 1 1 1
[2,] 0 1 1 -1 1 -1
[3,] 0 1 1 1 1 1
# run nrc sentiment analysis to return data frame with each row classified as one of the following
# emotions, rather than a score:
# anger, anticipation, disgust, fear, joy, sadness, surprise, trust
# It also counts the number of positive and negative emotions found in each row
d<-get_nrc_sentiment(text)
Warning: `spread_()` was deprecated in tidyr 1.2.0.
Please use `spread()` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
# head(d,10) - to see top 10 lines of the get_nrc_sentiment dataframe
head (d,10)
#transpose
td<-data.frame(t(d))
td
#The function rowSums computes column sums across rows for each level of a grouping variable.
td_new <- data.frame(rowSums(td[10:51]))
#Transformation and cleaning
names(td_new)[1] <- "count"
td_new <- cbind("sentiment" = rownames(td_new), td_new)
rownames(td_new) <- NULL
td_new2<-td_new[1:8,]
#Plot One - count of words associated with each sentiment
quickplot(sentiment, data=td_new2, weight=count, geom="bar", fill=sentiment, ylab="count")+ggtitle("Survey sentiments")

#Plot two - count of words associated with each sentiment, expressed as a percentage
barplot(
sort(colSums(prop.table(d[, 1:8]))),
horiz = TRUE,
cex.names = 0.7,
las = 1,
main = "Emotions in Text", xlab="Percentage"
)

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CnJlcXVpcmUodGlkeXZlcnNlKQpyZXF1aXJlKHNoaW55KQpgYGAKCmBgYHtyfQoKIyBMb2FkCnJlcXVpcmUoInRtIikKcmVxdWlyZSgiU25vd2JhbGxDIikKcmVxdWlyZSgid29yZGNsb3VkIikKcmVxdWlyZSgiUkNvbG9yQnJld2VyIikKcmVxdWlyZSgic3l1emhldCIpCnJlcXVpcmUoImdncGxvdDIiKQpgYGAKCmBgYHtyfQojIFJlYWQgdGhlIHRleHQgZmlsZSBmcm9tIGxvY2FsIG1hY2hpbmUgLCBjaG9vc2UgZmlsZSBpbnRlcmFjdGl2ZWx5CnRleHQgPC0gcmVhZF9saW5lcygiaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL2FkYW1ub3JzdHJvbS9GaWxlL21haW4vV2lraURhdGEuY3N2IikKIyBMb2FkIHRoZSBkYXRhIGFzIGEgY29ycHVzClRleHREb2MgPC0gQ29ycHVzKFZlY3RvclNvdXJjZSh0ZXh0KSkKYGBgCgpgYGB7cn0KVGV4dERvY1sxXQpgYGAKCgpgYGB7cn0KI1JlcGxhY2luZyAiLyIsICJAIiBhbmQgInwiIHdpdGggc3BhY2UKdG9TcGFjZSA8LSBjb250ZW50X3RyYW5zZm9ybWVyKGZ1bmN0aW9uICh4ICwgcGF0dGVybiApIGdzdWIocGF0dGVybiwgIiAiLCB4KSkKVGV4dERvYyA8LSB0bV9tYXAoVGV4dERvYywgdG9TcGFjZSwgIi8iKQpUZXh0RG9jIDwtIHRtX21hcChUZXh0RG9jLCB0b1NwYWNlLCAiQCIpClRleHREb2MgPC0gdG1fbWFwKFRleHREb2MsIHRvU3BhY2UsICJcXHwiKQojIENvbnZlcnQgdGhlIHRleHQgdG8gbG93ZXIgY2FzZQpUZXh0RG9jIDwtIHRtX21hcChUZXh0RG9jLCBjb250ZW50X3RyYW5zZm9ybWVyKHRvbG93ZXIpKQojIFJlbW92ZSBudW1iZXJzClRleHREb2MgPC0gdG1fbWFwKFRleHREb2MsIHJlbW92ZU51bWJlcnMpCiMgUmVtb3ZlIGVuZ2xpc2ggY29tbW9uIHN0b3B3b3JkcwpUZXh0RG9jIDwtIHRtX21hcChUZXh0RG9jLCByZW1vdmVXb3Jkcywgc3RvcHdvcmRzKCJlbmdsaXNoIikpCiMgUmVtb3ZlIHlvdXIgb3duIHN0b3Agd29yZAojIHNwZWNpZnkgeW91ciBjdXN0b20gc3RvcHdvcmRzIGFzIGEgY2hhcmFjdGVyIHZlY3RvcgpUZXh0RG9jIDwtIHRtX21hcChUZXh0RG9jLCByZW1vdmVXb3JkcywgYygicyIsICJjb21wYW55IiwgInRlYW0iKSkgCiMgUmVtb3ZlIHB1bmN0dWF0aW9ucwpUZXh0RG9jIDwtIHRtX21hcChUZXh0RG9jLCByZW1vdmVQdW5jdHVhdGlvbikKIyBFbGltaW5hdGUgZXh0cmEgd2hpdGUgc3BhY2VzClRleHREb2MgPC0gdG1fbWFwKFRleHREb2MsIHN0cmlwV2hpdGVzcGFjZSkKIyBUZXh0IHN0ZW1taW5nIC0gd2hpY2ggcmVkdWNlcyB3b3JkcyB0byB0aGVpciByb290IGZvcm0KVGV4dERvYyA8LSB0bV9tYXAoVGV4dERvYywgc3RlbURvY3VtZW50KQpgYGAKYGBge3J9CiMgQnVpbGQgYSB0ZXJtLWRvY3VtZW50IG1hdHJpeApUZXh0RG9jX2R0bSA8LSBUZXJtRG9jdW1lbnRNYXRyaXgoVGV4dERvYykKZHRtX20gPC0gYXMubWF0cml4KFRleHREb2NfZHRtKQojIFNvcnQgYnkgZGVzY2VuZGluZyB2YWx1ZSBvZiBmcmVxdWVuY3kKZHRtX3YgPC0gc29ydChyb3dTdW1zKGR0bV9tKSxkZWNyZWFzaW5nPVRSVUUpCmR0bV9kIDwtIGRhdGEuZnJhbWUod29yZCA9IG5hbWVzKGR0bV92KSxmcmVxPWR0bV92KQojIERpc3BsYXkgdGhlIHRvcCA1IG1vc3QgZnJlcXVlbnQgd29yZHMKaGVhZChkdG1fZCwgNSkKYGBgCgoKCgpgYGB7cn0KIyBQbG90IHRoZSBtb3N0IGZyZXF1ZW50IHdvcmRzCnBsb3RtYWluID0gYmFycGxvdChkdG1fZFsxOjUsXSRmcmVxLCBsYXMgPSAyLCBuYW1lcy5hcmcgPSBkdG1fZFsxOjUsXSR3b3JkLAogICAgICAgIGNvbCA9ImxpZ2h0Z3JlZW4iLCBtYWluID0iVG9wIDUgbW9zdCBmcmVxdWVudCB3b3JkcyIsCiAgICAgICAgeWxhYiA9ICJXb3JkIGZyZXF1ZW5jaWVzIikKYGBgCgoKCmBgYHtyfQpzZXJ2ZXIgPC0gZnVuY3Rpb24oaW5wdXQsIG91dHB1dCkgewogIG91dHB1dCRwbG90IDwtIHJlbmRlclBsb3QoZ2dwbG90KGR0bV9kWzE6NSxdLCBhZXMoeCA9IHlpZWxkKSkgKyAgIyBDcmVhdGUgb2JqZWN0IGNhbGxlZCBgb3V0cHV0JHBsb3RgIHdpdGggYSBnZ3Bsb3QgaW5zaWRlIGl0CiAgZ2VvbV9iYXIoYmlucyA9IDUsICAjIEFkZCBhIGhpc3RvZ3JhbSB0byB0aGUgcGxvdAogICAgZmlsbCA9ICJncmV5IiwgICMgTWFrZSB0aGUgZmlsbCBjb2xvdXIgZ3JleQogICAgZGF0YSA9IGR0bV9kLCAgIyBVc2UgZGF0YSBmcm9tIGBCYXJsZXlgCiAgICBjb2xvdXIgPSAiYmxhY2siKSAgIyBPdXRsaW5lIHRoZSBiaW5zIGluIGJsYWNrCiAgKSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKfQpgYGAKCgoKCgpgYGB7cn0Kc2V0LnNlZWQoMTIzNCkKd29yZGNsb3VkKHdvcmRzID0gZHRtX2Qkd29yZCwgZnJlcSA9IGR0bV9kJGZyZXEsIG1pbi5mcmVxID0gNSwKICAgICAgICAgIG1heC53b3Jkcz0xMDAsIHJhbmRvbS5vcmRlcj1GQUxTRSwgcm90LnBlcj0wLjQwLCAKICAgICAgICAgIGNvbG9ycz1icmV3ZXIucGFsKDgsICJEYXJrMiIpKQpgYGAKCmBgYHtyfQojIHJlZ3VsYXIgc2VudGltZW50IHNjb3JlIHVzaW5nIGdldF9zZW50aW1lbnQoKSBmdW5jdGlvbiBhbmQgbWV0aG9kIG9mIHlvdXIgY2hvaWNlCiMgcGxlYXNlIG5vdGUgdGhhdCBkaWZmZXJlbnQgbWV0aG9kcyBtYXkgaGF2ZSBkaWZmZXJlbnQgc2NhbGVzCnN5dXpoZXRfdmVjdG9yIDwtIGdldF9zZW50aW1lbnQodGV4dCwgbWV0aG9kPSJzeXV6aGV0IikKIyBzZWUgdGhlIGZpcnN0IHJvdyBvZiB0aGUgdmVjdG9yCmhlYWQoc3l1emhldF92ZWN0b3IpCiMgc2VlIHN1bW1hcnkgc3RhdGlzdGljcyBvZiB0aGUgdmVjdG9yCnN1bW1hcnkoc3l1emhldF92ZWN0b3IpCmBgYAoKYGBge3J9CiMgYmluZwpiaW5nX3ZlY3RvciA8LSBnZXRfc2VudGltZW50KHRleHQsIG1ldGhvZD0iYmluZyIpCmhlYWQoYmluZ192ZWN0b3IpCnN1bW1hcnkoYmluZ192ZWN0b3IpCiNhZmZpbgphZmlubl92ZWN0b3IgPC0gZ2V0X3NlbnRpbWVudCh0ZXh0LCBtZXRob2Q9ImFmaW5uIikKaGVhZChhZmlubl92ZWN0b3IpCnN1bW1hcnkoYWZpbm5fdmVjdG9yKQpgYGAKCmBgYHtyfQojY29tcGFyZSB0aGUgZmlyc3Qgcm93IG9mIGVhY2ggdmVjdG9yIHVzaW5nIHNpZ24gZnVuY3Rpb24KcmJpbmQoCiAgc2lnbihoZWFkKHN5dXpoZXRfdmVjdG9yKSksCiAgc2lnbihoZWFkKGJpbmdfdmVjdG9yKSksCiAgc2lnbihoZWFkKGFmaW5uX3ZlY3RvcikpCikKYGBgCgpgYGB7cn0KIyBydW4gbnJjIHNlbnRpbWVudCBhbmFseXNpcyB0byByZXR1cm4gZGF0YSBmcmFtZSB3aXRoIGVhY2ggcm93IGNsYXNzaWZpZWQgYXMgb25lIG9mIHRoZSBmb2xsb3dpbmcKIyBlbW90aW9ucywgcmF0aGVyIHRoYW4gYSBzY29yZTogCiMgYW5nZXIsIGFudGljaXBhdGlvbiwgZGlzZ3VzdCwgZmVhciwgam95LCBzYWRuZXNzLCBzdXJwcmlzZSwgdHJ1c3QgCiMgSXQgYWxzbyBjb3VudHMgdGhlIG51bWJlciBvZiBwb3NpdGl2ZSBhbmQgbmVnYXRpdmUgZW1vdGlvbnMgZm91bmQgaW4gZWFjaCByb3cKZDwtZ2V0X25yY19zZW50aW1lbnQodGV4dCkKIyBoZWFkKGQsMTApIC0gdG8gc2VlIHRvcCAxMCBsaW5lcyBvZiB0aGUgZ2V0X25yY19zZW50aW1lbnQgZGF0YWZyYW1lCmBgYAoKCmBgYHtyfQpoZWFkIChkLDEwKQpgYGAKYGBge3J9CiN0cmFuc3Bvc2UKdGQ8LWRhdGEuZnJhbWUodChkKSkKYGBgCgpgYGB7cn0KdGQKYGBgCgoKYGBge3J9CiNUaGUgZnVuY3Rpb24gcm93U3VtcyBjb21wdXRlcyBjb2x1bW4gc3VtcyBhY3Jvc3Mgcm93cyBmb3IgZWFjaCBsZXZlbCBvZiBhIGdyb3VwaW5nIHZhcmlhYmxlLgp0ZF9uZXcgPC0gZGF0YS5mcmFtZShyb3dTdW1zKHRkWzEwOjUxXSkpCmBgYAoKCmBgYHtyfQojVHJhbnNmb3JtYXRpb24gYW5kIGNsZWFuaW5nCm5hbWVzKHRkX25ldylbMV0gPC0gImNvdW50Igp0ZF9uZXcgPC0gY2JpbmQoInNlbnRpbWVudCIgPSByb3duYW1lcyh0ZF9uZXcpLCB0ZF9uZXcpCnJvd25hbWVzKHRkX25ldykgPC0gTlVMTAp0ZF9uZXcyPC10ZF9uZXdbMTo4LF0KI1Bsb3QgT25lIC0gY291bnQgb2Ygd29yZHMgYXNzb2NpYXRlZCB3aXRoIGVhY2ggc2VudGltZW50CnF1aWNrcGxvdChzZW50aW1lbnQsIGRhdGE9dGRfbmV3Miwgd2VpZ2h0PWNvdW50LCBnZW9tPSJiYXIiLCBmaWxsPXNlbnRpbWVudCwgeWxhYj0iY291bnQiKStnZ3RpdGxlKCJTdXJ2ZXkgc2VudGltZW50cyIpCmBgYAoKYGBge3J9CiNQbG90IHR3byAtIGNvdW50IG9mIHdvcmRzIGFzc29jaWF0ZWQgd2l0aCBlYWNoIHNlbnRpbWVudCwgZXhwcmVzc2VkIGFzIGEgcGVyY2VudGFnZQpiYXJwbG90KAogIHNvcnQoY29sU3Vtcyhwcm9wLnRhYmxlKGRbLCAxOjhdKSkpLCAKICBob3JpeiA9IFRSVUUsIAogIGNleC5uYW1lcyA9IDAuNywgCiAgbGFzID0gMSwgCiAgbWFpbiA9ICJFbW90aW9ucyBpbiBUZXh0IiwgeGxhYj0iUGVyY2VudGFnZSIKKQoKYGBgCgo=