# Install sentimentr directly from github using devtools, as no longer hosted on CRAN
#library(devtools)
#install_github('sentimentr', 'trinker')
library(sentimentr)
library(plyr)
library(dplyr)
# Create example data to conduct sentiment analysis, based on tweets
# Example tweets, obtained from https://github.com/TharinduMunasinge/Twitter-Sentiment-Analysis/blob/master/DataSet/FinalizedTest.csv
tweet1 <- c("now I'm happy")
tweet2 <- c("Night at the Museum, Wolverine and junk food - perfectmonday!")
tweet3 <- c("I love my Kindle2. No more stacks of books to trip over on the way to the loo")
tweet4 <- c("I hate revision, it's so boring! I am totally unprepared for my exam tomorrow :( Things are not looking good...")
tweet5 <- c("You're awesome and I love you")
# Create vector of all tweets
tweets <- c(tweet1, tweet2, tweet3, tweet4, tweet5)
# Calculate the mean sentiment score of each tweet and its result (positive, negative or neutral)
sen_score <- c()
sen_result <- c()
for (i in tweets){
# Calculate the sentiment
sen <- sentiment(i,n.before=0, n.after=0, amplifier.weight=0)
# Calculate the mean sentiment
mean_sen <- mean(sen$sentiment)
# Determine if sentiment is positive or negative
# Neautral set as within 0.05 of zero
if (mean_sen < -0.05){
sen_result <- c(sen_result, "negative")
} else if (mean_sen > 0.05) {
sen_result <- c(sen_result, "positive")
} else {
sen_result <- c(sen_result, "neutral")
}
# Add sen score to sen_score
sen_score <- c(sen_score, mean_sen)
}
# Display results in a dataframe
sen_analysis <- data.frame(tweets, sen_score, sen_result)
sen_analysis
As can be seen in the dataframe, sentimentr did a reasonable job of calculating if a tweet was either positive or negative. However, the interpretaation of the second tweet has been classified as neutral. This appears to be due to the missing space in between “perfect” and “monday”, therefore confusing the algorithm. A test run with the correct spacing between these words resulted in the correct classification of positive.
LS0tCnRpdGxlOiAiU2VudGltZW50IEFuYWx5c2lzIEV4YW1wbGUiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyfQojIEluc3RhbGwgc2VudGltZW50ciBkaXJlY3RseSBmcm9tIGdpdGh1YiB1c2luZyBkZXZ0b29scywgYXMgbm8gbG9uZ2VyIGhvc3RlZCBvbiBDUkFOCiNsaWJyYXJ5KGRldnRvb2xzKQojaW5zdGFsbF9naXRodWIoJ3NlbnRpbWVudHInLCAndHJpbmtlcicpCmxpYnJhcnkoc2VudGltZW50cikKbGlicmFyeShwbHlyKQpsaWJyYXJ5KGRwbHlyKQpgYGAKCgpgYGB7cn0KIyBDcmVhdGUgZXhhbXBsZSBkYXRhIHRvIGNvbmR1Y3Qgc2VudGltZW50IGFuYWx5c2lzLCBiYXNlZCBvbiB0d2VldHMKIyBFeGFtcGxlIHR3ZWV0cywgb2J0YWluZWQgZnJvbSBodHRwczovL2dpdGh1Yi5jb20vVGhhcmluZHVNdW5hc2luZ2UvVHdpdHRlci1TZW50aW1lbnQtQW5hbHlzaXMvYmxvYi9tYXN0ZXIvRGF0YVNldC9GaW5hbGl6ZWRUZXN0LmNzdiAKdHdlZXQxIDwtIGMoIm5vdyBJJ20gaGFwcHkiKSAKdHdlZXQyIDwtIGMoIk5pZ2h0IGF0IHRoZSBNdXNldW0sIFdvbHZlcmluZSBhbmQganVuayBmb29kIC0gcGVyZmVjdG1vbmRheSEiKQp0d2VldDMgPC0gYygiSSBsb3ZlIG15IEtpbmRsZTIuIE5vIG1vcmUgc3RhY2tzIG9mIGJvb2tzIHRvIHRyaXAgb3ZlciBvbiB0aGUgd2F5IHRvIHRoZSBsb28iKQp0d2VldDQgPC0gYygiSSBoYXRlIHJldmlzaW9uLCBpdCdzIHNvIGJvcmluZyEgSSBhbSB0b3RhbGx5IHVucHJlcGFyZWQgZm9yIG15IGV4YW0gdG9tb3Jyb3cgOiggVGhpbmdzIGFyZSBub3QgbG9va2luZyBnb29kLi4uIikKdHdlZXQ1IDwtIGMoIllvdSdyZSBhd2Vzb21lIGFuZCBJIGxvdmUgeW91IikKCiMgQ3JlYXRlIHZlY3RvciBvZiBhbGwgdHdlZXRzCnR3ZWV0cyA8LSBjKHR3ZWV0MSwgdHdlZXQyLCB0d2VldDMsIHR3ZWV0NCwgdHdlZXQ1KQpgYGAKCgpgYGB7cn0KIyBDYWxjdWxhdGUgdGhlIG1lYW4gc2VudGltZW50IHNjb3JlIG9mIGVhY2ggdHdlZXQgYW5kIGl0cyByZXN1bHQgKHBvc2l0aXZlLCBuZWdhdGl2ZSBvciBuZXV0cmFsKQpzZW5fc2NvcmUgPC0gYygpCnNlbl9yZXN1bHQgPC0gYygpCgpmb3IgKGkgaW4gdHdlZXRzKXsKICAjIENhbGN1bGF0ZSB0aGUgc2VudGltZW50CiAgc2VuIDwtIHNlbnRpbWVudChpLG4uYmVmb3JlPTAsIG4uYWZ0ZXI9MCwgYW1wbGlmaWVyLndlaWdodD0wKQogIAogICMgQ2FsY3VsYXRlIHRoZSBtZWFuIHNlbnRpbWVudAogIG1lYW5fc2VuIDwtIG1lYW4oc2VuJHNlbnRpbWVudCkKICAKICAjIERldGVybWluZSBpZiBzZW50aW1lbnQgaXMgcG9zaXRpdmUgb3IgbmVnYXRpdmUKICAjIE5lYXV0cmFsIHNldCBhcyB3aXRoaW4gMC4wNSBvZiB6ZXJvCiAgaWYgKG1lYW5fc2VuIDwgLTAuMDUpewogICAgc2VuX3Jlc3VsdCA8LSBjKHNlbl9yZXN1bHQsICJuZWdhdGl2ZSIpCiAgfSBlbHNlIGlmIChtZWFuX3NlbiA+IDAuMDUpIHsKICAgIHNlbl9yZXN1bHQgPC0gYyhzZW5fcmVzdWx0LCAicG9zaXRpdmUiKQogIH0gZWxzZSB7CiAgICBzZW5fcmVzdWx0IDwtIGMoc2VuX3Jlc3VsdCwgIm5ldXRyYWwiKQogIH0KICAKICAjIEFkZCBzZW4gc2NvcmUgdG8gc2VuX3Njb3JlCiAgc2VuX3Njb3JlIDwtIGMoc2VuX3Njb3JlLCBtZWFuX3NlbikKfQpgYGAKCgpgYGB7cn0KIyBEaXNwbGF5IHJlc3VsdHMgaW4gYSBkYXRhZnJhbWUKc2VuX2FuYWx5c2lzIDwtIGRhdGEuZnJhbWUodHdlZXRzLCBzZW5fc2NvcmUsIHNlbl9yZXN1bHQpCnNlbl9hbmFseXNpcwpgYGAKQXMgY2FuIGJlIHNlZW4gaW4gdGhlIGRhdGFmcmFtZSwgc2VudGltZW50ciBkaWQgYSByZWFzb25hYmxlIGpvYiBvZiBjYWxjdWxhdGluZyBpZiBhIHR3ZWV0IHdhcyBlaXRoZXIgcG9zaXRpdmUgb3IgbmVnYXRpdmUuIEhvd2V2ZXIsIHRoZSBpbnRlcnByZXRhYXRpb24gb2YgdGhlIHNlY29uZCB0d2VldCBoYXMgYmVlbiBjbGFzc2lmaWVkIGFzIG5ldXRyYWwuIFRoaXMgYXBwZWFycyB0byBiZSBkdWUgdG8gdGhlIG1pc3Npbmcgc3BhY2UgaW4gYmV0d2VlbiAgInBlcmZlY3QiIGFuZCAibW9uZGF5IiwgdGhlcmVmb3JlIGNvbmZ1c2luZyB0aGUgYWxnb3JpdGhtLiBBIHRlc3QgcnVuIHdpdGggdGhlIGNvcnJlY3Qgc3BhY2luZyBiZXR3ZWVuIHRoZXNlIHdvcmRzIHJlc3VsdGVkIGluIHRoZSBjb3JyZWN0IGNsYXNzaWZpY2F0aW9uIG9mIHBvc2l0aXZlLiAKCgo=