Sentiment Analysis

# Load libraries
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(tidytext)
library(readtext)
# creates dataframe with all of the text documents
# change directory so that it refers to the location of the 72 text documents on your computer
transcripts<-readtext("~/Documents/School/ACG 6176- Evaluation of Financial Reports, Business Analysis & Valuation/Week 7/Transcripts")
View(transcripts)
# Transform the text data to tidy text data
tidy_transcripts <- transcripts %>%
  group_by(doc_id) %>%
  unnest_tokens(word, text) %>%
  ungroup()
# Use count to find out how many times each word is used
wordcount_per_document<-tidy_transcripts %>%group_by(doc_id)%>% count(word, sort = TRUE)%>%mutate(totalwordcount=sum(n))
wordcount_per_document2<-wordcount_per_document[,c("doc_id", "totalwordcount")]
wordcount_per_document3<-wordcount_per_document2 %>% distinct
# Implement sentiment analysis with the Loughran and McDonald positive and negative (financial) word list
tidy_sentiment <- tidy_transcripts %>%
  inner_join(get_sentiments("loughran")) %>% filter(sentiment == "positive" | sentiment=="negative")
Joining, by = "word"
# Find how many positive/negative words each document has
sentiment_counts <- tidy_sentiment %>%
  count(doc_id, sentiment)
sentiment_counts2 <- merge(wordcount_per_document3,sentiment_counts,by="doc_id")
positive <- sentiment_counts2 %>% filter(sentiment == "positive") %>% mutate(positive_word_count=n)
negative <- sentiment_counts2 %>% filter(sentiment == "negative") %>% mutate(negative_word_count=n)
sentiment_counts3 <- merge(positive,negative,by=c("doc_id", "totalwordcount"))
tone <- sentiment_counts3 %>% mutate(tone=((positive_word_count-negative_word_count)/totalwordcount)*100)
View(tone)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

LS0tCnRpdGxlOiAiV2VlayA3IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCiMgU2VudGltZW50IEFuYWx5c2lzCmBgYHtyfQojIExvYWQgbGlicmFyaWVzCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkodGlkeXRleHQpCmxpYnJhcnkocmVhZHRleHQpCmBgYAoKYGBge3J9CiMgY3JlYXRlcyBkYXRhZnJhbWUgd2l0aCBhbGwgb2YgdGhlIHRleHQgZG9jdW1lbnRzCiMgY2hhbmdlIGRpcmVjdG9yeSBzbyB0aGF0IGl0IHJlZmVycyB0byB0aGUgbG9jYXRpb24gb2YgdGhlIDcyIHRleHQgZG9jdW1lbnRzIG9uIHlvdXIgY29tcHV0ZXIKdHJhbnNjcmlwdHM8LXJlYWR0ZXh0KCJ+L0RvY3VtZW50cy9TY2hvb2wvQUNHIDYxNzYtIEV2YWx1YXRpb24gb2YgRmluYW5jaWFsIFJlcG9ydHMsIEJ1c2luZXNzIEFuYWx5c2lzICYgVmFsdWF0aW9uL1dlZWsgNy9UcmFuc2NyaXB0cyIpCgpWaWV3KHRyYW5zY3JpcHRzKQpgYGAKCmBgYHtyfQojIFRyYW5zZm9ybSB0aGUgdGV4dCBkYXRhIHRvIHRpZHkgdGV4dCBkYXRhCnRpZHlfdHJhbnNjcmlwdHMgPC0gdHJhbnNjcmlwdHMgJT4lCiAgZ3JvdXBfYnkoZG9jX2lkKSAlPiUKICB1bm5lc3RfdG9rZW5zKHdvcmQsIHRleHQpICU+JQogIHVuZ3JvdXAoKQpgYGAKCmBgYHtyfQojIFVzZSBjb3VudCB0byBmaW5kIG91dCBob3cgbWFueSB0aW1lcyBlYWNoIHdvcmQgaXMgdXNlZAp3b3JkY291bnRfcGVyX2RvY3VtZW50PC10aWR5X3RyYW5zY3JpcHRzICU+JWdyb3VwX2J5KGRvY19pZCklPiUgY291bnQod29yZCwgc29ydCA9IFRSVUUpJT4lbXV0YXRlKHRvdGFsd29yZGNvdW50PXN1bShuKSkKd29yZGNvdW50X3Blcl9kb2N1bWVudDI8LXdvcmRjb3VudF9wZXJfZG9jdW1lbnRbLGMoImRvY19pZCIsICJ0b3RhbHdvcmRjb3VudCIpXQp3b3JkY291bnRfcGVyX2RvY3VtZW50Mzwtd29yZGNvdW50X3Blcl9kb2N1bWVudDIgJT4lIGRpc3RpbmN0CmBgYAoKYGBge3J9CiMgSW1wbGVtZW50IHNlbnRpbWVudCBhbmFseXNpcyB3aXRoIHRoZSBMb3VnaHJhbiBhbmQgTWNEb25hbGQgcG9zaXRpdmUgYW5kIG5lZ2F0aXZlIChmaW5hbmNpYWwpIHdvcmQgbGlzdAp0aWR5X3NlbnRpbWVudCA8LSB0aWR5X3RyYW5zY3JpcHRzICU+JQogIGlubmVyX2pvaW4oZ2V0X3NlbnRpbWVudHMoImxvdWdocmFuIikpICU+JSBmaWx0ZXIoc2VudGltZW50ID09ICJwb3NpdGl2ZSIgfCBzZW50aW1lbnQ9PSJuZWdhdGl2ZSIpCmBgYAoKYGBge3J9CiMgRmluZCBob3cgbWFueSBwb3NpdGl2ZS9uZWdhdGl2ZSB3b3JkcyBlYWNoIGRvY3VtZW50IGhhcwpzZW50aW1lbnRfY291bnRzIDwtIHRpZHlfc2VudGltZW50ICU+JQogIGNvdW50KGRvY19pZCwgc2VudGltZW50KQoKc2VudGltZW50X2NvdW50czIgPC0gbWVyZ2Uod29yZGNvdW50X3Blcl9kb2N1bWVudDMsc2VudGltZW50X2NvdW50cyxieT0iZG9jX2lkIikKCnBvc2l0aXZlIDwtIHNlbnRpbWVudF9jb3VudHMyICU+JSBmaWx0ZXIoc2VudGltZW50ID09ICJwb3NpdGl2ZSIpICU+JSBtdXRhdGUocG9zaXRpdmVfd29yZF9jb3VudD1uKQpuZWdhdGl2ZSA8LSBzZW50aW1lbnRfY291bnRzMiAlPiUgZmlsdGVyKHNlbnRpbWVudCA9PSAibmVnYXRpdmUiKSAlPiUgbXV0YXRlKG5lZ2F0aXZlX3dvcmRfY291bnQ9bikKCnNlbnRpbWVudF9jb3VudHMzIDwtIG1lcmdlKHBvc2l0aXZlLG5lZ2F0aXZlLGJ5PWMoImRvY19pZCIsICJ0b3RhbHdvcmRjb3VudCIpKQpgYGAKCmBgYHtyfQp0b25lIDwtIHNlbnRpbWVudF9jb3VudHMzICU+JSBtdXRhdGUodG9uZT0oKHBvc2l0aXZlX3dvcmRfY291bnQtbmVnYXRpdmVfd29yZF9jb3VudCkvdG90YWx3b3JkY291bnQpKjEwMCkKVmlldyh0b25lKQpgYGAKCkFkZCBhIG5ldyBjaHVuayBieSBjbGlja2luZyB0aGUgKkluc2VydCBDaHVuayogYnV0dG9uIG9uIHRoZSB0b29sYmFyIG9yIGJ5IHByZXNzaW5nICpDbWQrT3B0aW9uK0kqLgoKV2hlbiB5b3Ugc2F2ZSB0aGUgbm90ZWJvb2ssIGFuIEhUTUwgZmlsZSBjb250YWluaW5nIHRoZSBjb2RlIGFuZCBvdXRwdXQgd2lsbCBiZSBzYXZlZCBhbG9uZ3NpZGUgaXQgKGNsaWNrIHRoZSAqUHJldmlldyogYnV0dG9uIG9yIHByZXNzICpDbWQrU2hpZnQrSyogdG8gcHJldmlldyB0aGUgSFRNTCBmaWxlKS4gCgpUaGUgcHJldmlldyBzaG93cyB5b3UgYSByZW5kZXJlZCBIVE1MIGNvcHkgb2YgdGhlIGNvbnRlbnRzIG9mIHRoZSBlZGl0b3IuIENvbnNlcXVlbnRseSwgdW5saWtlICpLbml0KiwgKlByZXZpZXcqIGRvZXMgbm90IHJ1biBhbnkgUiBjb2RlIGNodW5rcy4gSW5zdGVhZCwgdGhlIG91dHB1dCBvZiB0aGUgY2h1bmsgd2hlbiBpdCB3YXMgbGFzdCBydW4gaW4gdGhlIGVkaXRvciBpcyBkaXNwbGF5ZWQuCgo=