DATA 607 HW Week 10

Example Code

Citations:

Example code was downloaded from here

Robinson, Julia Silge and David. “2 Sentiment Analysis with Tidy Data: Text Mining with R.” 2 Sentiment Analysis with Tidy Data | Text Mining with R, https://www.tidytextmining.com/sentiment.html.

Data Sets: Saif M. Mohammad and Peter Turney. (2013), ``Crowdsourcing a Word-Emotion Association Lexicon.’’ Computational Intelligence, 29(3): 436-465. nrc

Finn Arup Nielsen: AFINN

Bing Liu and Collaborators: bing

Loughran Lexicon: Marketing Communications: Web // University of Notre Dame. “Resources // Software Repository for Accounting and Finance // University of Notre Dame.” Software Repository for Accounting and Finance, https://sraf.nd.edu/textual-analysis/resources/#Master%20Dictionary.

Comment

Here, the data is loaded and sentiment values are retrieved.

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(tidytext)
library(janeaustenr)
library(dplyr)
library(stringr)
library(tidyr)
library(ggplot2)
library(wordcloud)

## Loading required package: RColorBrewer

library(reshape2)

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

get_sentiments("afinn")

## # A tibble: 2,477 × 2
##    word       value
##    <chr>      <dbl>
##  1 abandon       -2
##  2 abandoned     -2
##  3 abandons      -2
##  4 abducted      -2
##  5 abduction     -2
##  6 abductions    -2
##  7 abhor         -3
##  8 abhorred      -3
##  9 abhorrent     -3
## 10 abhors        -3
## # … with 2,467 more rows

get_sentiments("bing")

## # A tibble: 6,786 × 2
##    word        sentiment
##    <chr>       <chr>    
##  1 2-faces     negative 
##  2 abnormal    negative 
##  3 abolish     negative 
##  4 abominable  negative 
##  5 abominably  negative 
##  6 abominate   negative 
##  7 abomination negative 
##  8 abort       negative 
##  9 aborted     negative 
## 10 aborts      negative 
## # … with 6,776 more rows

get_sentiments("nrc")

## # A tibble: 13,875 × 2
##    word        sentiment
##    <chr>       <chr>    
##  1 abacus      trust    
##  2 abandon     fear     
##  3 abandon     negative 
##  4 abandon     sadness  
##  5 abandoned   anger    
##  6 abandoned   fear     
##  7 abandoned   negative 
##  8 abandoned   sadness  
##  9 abandonment anger    
## 10 abandonment fear     
## # … with 13,865 more rows

Joining the Data

Next, the data was manipulated to isolate Jane Austen’s books and they were further filtered to seek out positive sentiments. Then, the sentiments were plotted and filtered by book.

tidy_books <- austen_books() %>%
  group_by(book) %>%
  mutate(
    linenumber = row_number(),
    chapter = cumsum(str_detect(text, 
                                regex("^chapter [\\divxlc]", 
                                      ignore_case = TRUE)))) %>%
  ungroup() %>%
  unnest_tokens(word, text)

nrc_joy <- get_sentiments("nrc") %>% 
  filter(sentiment == "joy")

tidy_books %>%
  filter(book == "Emma") %>%
  inner_join(nrc_joy) %>%
  count(word, sort = TRUE)

## Joining, by = "word"

## # A tibble: 301 × 2
##    word          n
##    <chr>     <int>
##  1 good        359
##  2 friend      166
##  3 hope        143
##  4 happy       125
##  5 love        117
##  6 deal         92
##  7 found        92
##  8 present      89
##  9 kind         82
## 10 happiness    76
## # … with 291 more rows

jane_austen_sentiment <- tidy_books %>%
  inner_join(get_sentiments("bing")) %>%
  count(book, index = linenumber %/% 80, sentiment) %>%
  pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>% 
  mutate(sentiment = positive - negative)

## Joining, by = "word"

ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~book, ncol = 2, scales = "free_x")

Exploring Pride and Prejudice

Next, focus was placed on Pride and Prejudice. Individual words were selected from the book and sentiments were tabulated on those words. The three data sets, AFINN, bing, and nrc were compared on the same plot. Then, the total positive and negative sentiments are displayed from bing and nrc.

pride_prejudice <- tidy_books %>% 
  filter(book == "Pride & Prejudice")

pride_prejudice

## # A tibble: 122,204 × 4
##    book              linenumber chapter word     
##    <fct>                  <int>   <int> <chr>    
##  1 Pride & Prejudice          1       0 pride    
##  2 Pride & Prejudice          1       0 and      
##  3 Pride & Prejudice          1       0 prejudice
##  4 Pride & Prejudice          3       0 by       
##  5 Pride & Prejudice          3       0 jane     
##  6 Pride & Prejudice          3       0 austen   
##  7 Pride & Prejudice          7       1 chapter  
##  8 Pride & Prejudice          7       1 1        
##  9 Pride & Prejudice         10       1 it       
## 10 Pride & Prejudice         10       1 is       
## # … with 122,194 more rows

afinn <- pride_prejudice %>% 
  inner_join(get_sentiments("afinn")) %>% 
  group_by(index = linenumber %/% 80) %>% 
  summarise(sentiment = sum(value)) %>% 
  mutate(method = "AFINN")

## Joining, by = "word"

bing_and_nrc <- bind_rows(
  pride_prejudice %>% 
    inner_join(get_sentiments("bing")) %>%
    mutate(method = "Bing et al."),
  pride_prejudice %>% 
    inner_join(get_sentiments("nrc") %>% 
                 filter(sentiment %in% c("positive", 
                                         "negative"))
    ) %>%
    mutate(method = "NRC")) %>%
  count(method, index = linenumber %/% 80, sentiment) %>%
  pivot_wider(names_from = sentiment,
              values_from = n,
              values_fill = 0) %>% 
  mutate(sentiment = positive - negative)

## Joining, by = "word"
## Joining, by = "word"

bind_rows(afinn, 
          bing_and_nrc) %>%
  ggplot(aes(index, sentiment, fill = method)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~method, ncol = 1, scales = "free_y")

get_sentiments("nrc") %>% 
  filter(sentiment %in% c("positive", "negative")) %>% 
  count(sentiment)

## # A tibble: 2 × 2
##   sentiment     n
##   <chr>     <int>
## 1 negative   3318
## 2 positive   2308

get_sentiments("bing") %>% 
  count(sentiment)

## # A tibble: 2 × 2
##   sentiment     n
##   <chr>     <int>
## 1 negative   4781
## 2 positive   2005

Contributions to Sentiment

Here, individual words were explored from the bing data set. The words were plotted to visualize the impact each word had on overall sentiment.

bing_word_counts <- tidy_books %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  ungroup()

## Joining, by = "word"

bing_word_counts

## # A tibble: 2,585 × 3
##    word     sentiment     n
##    <chr>    <chr>     <int>
##  1 miss     negative   1855
##  2 well     positive   1523
##  3 good     positive   1380
##  4 great    positive    981
##  5 like     positive    725
##  6 better   positive    639
##  7 enough   positive    613
##  8 happy    positive    534
##  9 love     positive    495
## 10 pleasure positive    462
## # … with 2,575 more rows

bing_word_counts %>%
  group_by(sentiment) %>%
  slice_max(n, n = 10) %>% 
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(n, word, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(x = "Contribution to sentiment",
       y = NULL)

custom_stop_words <- bind_rows(tibble(word = c("miss"),  
                                      lexicon = c("custom")), 
                               stop_words)

custom_stop_words

## # A tibble: 1,150 × 2
##    word        lexicon
##    <chr>       <chr>  
##  1 miss        custom 
##  2 a           SMART  
##  3 a's         SMART  
##  4 able        SMART  
##  5 about       SMART  
##  6 above       SMART  
##  7 according   SMART  
##  8 accordingly SMART  
##  9 across      SMART  
## 10 actually    SMART  
## # … with 1,140 more rows

Word Clouds

Comments:

I would argue that “miss” is not as negative as it seems. It is very likely that most female characters were referred to as Miss.

tidy_books %>%
  anti_join(stop_words) %>%
  count(word) %>%
  with(wordcloud(word, n, max.words = 100))

## Joining, by = "word"

tidy_books %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  comparison.cloud(colors = c("gray20", "gray80"),
                   max.words = 100)

## Joining, by = "word"

Saddest Chapters

Here, the saddest chapters were selected and displayed in a data frame. The ratio represents the sad words as a ratio to the total words in the chapter.

p_and_p_sentences <- tibble(text = prideprejudice) %>% 
  unnest_tokens(sentence, text, token = "sentences")

p_and_p_sentences$sentence[2]

## [1] "by jane austen"

austen_chapters <- austen_books() %>%
  group_by(book) %>%
  unnest_tokens(chapter, text, token = "regex", 
                pattern = "Chapter|CHAPTER [\\dIVXLC]") %>%
  ungroup()

austen_chapters %>% 
  group_by(book) %>% 
  summarise(chapters = n())

## # A tibble: 6 × 2
##   book                chapters
##   <fct>                  <int>
## 1 Sense & Sensibility       51
## 2 Pride & Prejudice         62
## 3 Mansfield Park            49
## 4 Emma                      56
## 5 Northanger Abbey          32
## 6 Persuasion                25

bingnegative <- get_sentiments("bing") %>% 
  filter(sentiment == "negative")

wordcounts <- tidy_books %>%
  group_by(book, chapter) %>%
  summarize(words = n())

## `summarise()` has grouped output by 'book'. You can override using the `.groups` argument.

tidy_books %>%
  semi_join(bingnegative) %>%
  group_by(book, chapter) %>%
  summarize(negativewords = n()) %>%
  left_join(wordcounts, by = c("book", "chapter")) %>%
  mutate(ratio = negativewords/words) %>%
  filter(chapter != 0) %>%
  slice_max(ratio, n = 1) %>% 
  ungroup()

## Joining, by = "word"
## `summarise()` has grouped output by 'book'. You can override using the `.groups` argument.

## # A tibble: 6 × 5
##   book                chapter negativewords words  ratio
##   <fct>                 <int>         <int> <int>  <dbl>
## 1 Sense & Sensibility      43           161  3405 0.0473
## 2 Pride & Prejudice        34           111  2104 0.0528
## 3 Mansfield Park           46           173  3685 0.0469
## 4 Emma                     15           151  3340 0.0452
## 5 Northanger Abbey         21           149  2982 0.0500
## 6 Persuasion                4            62  1807 0.0343

Original Work:

I decided to explore Harry Potter for my corpus of choice. I found an R package online that contains all of the text for the seven main books in the Harry Potter series. The problem I ran into was that I could not load the correct harrypotter package. I used install.packages(harrypotter). This was a mistake. There is another Harry Potter House package that was being confused with the books package, and I could not resolve the issue for quite some time. I had to remove the package and restart R and clear all consoles and values in order to finally be able to call the correct package. The package installation command is copied from the original package on github.

The other sentiment lexicon I found and will be using was found here

Loughran-McDonald Sentiment lexicon

if (packageVersion("devtools") < 1.6) {
  install.packages("devtools")
}
devtools::install_github("bradleyboehmke/harrypotter", force = TRUE)

## Downloading GitHub repo bradleyboehmke/harrypotter@HEAD

##   
   checking for file ‘/private/var/folders/j0/csjb4l51263g59vn3xvc41c00000gn/T/RtmpInknaq/remotes1467634399fa0/bradleyboehmke-harrypotter-51f7146/DESCRIPTION’ ...
  
✓  checking for file ‘/private/var/folders/j0/csjb4l51263g59vn3xvc41c00000gn/T/RtmpInknaq/remotes1467634399fa0/bradleyboehmke-harrypotter-51f7146/DESCRIPTION’
## 
  
─  preparing ‘harrypotter’:
## 
  
   checking DESCRIPTION meta-information ...
  
✓  checking DESCRIPTION meta-information
## 
  
─  checking for LF line-endings in source and make files and shell scripts
## 
  
─  checking for empty or unneeded directories
## 
  
─  building ‘harrypotter_0.1.0.tar.gz’
## 
  
   
##

library(harrypotter)

titles <- c("Philosopher's Stone", "Chamber of Secrets", "Prisoner of Azkaban",
            "Goblet of Fire", "Order of the Phoenix", "Half-Blood Prince",
            "Deathly Hallows")
book <- list(philosophers_stone, chamber_of_secrets, prisoner_of_azkaban,
              goblet_of_fire, order_of_the_phoenix, half_blood_prince,
              deathly_hallows)

#df <- data.frame(harrypotter::philosophers_stone)

potter_tidy <-   list(philosophers_stone, chamber_of_secrets, prisoner_of_azkaban,
              goblet_of_fire, order_of_the_phoenix, half_blood_prince,
              deathly_hallows) %>%
  set_names(titles) %>%
  map_df(as_tibble, .id = "book") %>%
  mutate(book = factor(book, levels = titles)) %>%
  drop_na(value) %>%
  group_by(book) %>%
  mutate(chapter = row_number(book)) %>%
  ungroup() %>%
  unnest_tokens(word, value)  


potter_sentiment <- potter_tidy %>%
  inner_join(get_sentiments("loughran")) %>%
  count(book, index = chapter, sentiment) %>%
  pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>% 
  mutate(sentiment = positive - negative)

## Joining, by = "word"

ggplot(potter_sentiment, aes(index, sentiment, fill = book)) +
  geom_col(show.legend = FALSE) +
  coord_cartesian(ylim = c(-150, 50))+
  facet_wrap(~book, ncol = 2, scales = "free_x")+
  ggtitle("Harry Potter Sentiments: Loughran Lexicon")+
  theme(plot.title = element_text(hjust = 0.5))

potter_sentiment <- potter_tidy %>%
  inner_join(get_sentiments("bing")) %>%
  count(book, index = chapter, sentiment) %>%
  pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>% 
  mutate(sentiment = positive - negative)

## Joining, by = "word"

ggplot(potter_sentiment, aes(index, sentiment, fill = book)) +
  geom_col(show.legend = FALSE) +
  coord_cartesian(ylim = c(-200, 100))+
  facet_wrap(~book, ncol = 2, scales = "free_x")+
  ggtitle("Harry Potter Sentiments: Bing Lexicon")+
  theme(plot.title = element_text(hjust = 0.5))

potter_sentiment <- potter_tidy %>%
  inner_join(get_sentiments("nrc")) %>%
  count(book, index = chapter, sentiment) %>%
  pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>% 
  mutate(sentiment = positive - negative)

## Joining, by = "word"

ggplot(potter_sentiment, aes(index, sentiment, fill = book)) +
  geom_col(show.legend = FALSE) +
  coord_cartesian(ylim = c(-300, 100))+
  facet_wrap(~book, ncol = 2, scales = "free_x")+
  ggtitle("Harry Potter Sentiments: NRC Lexicon")+
  theme(plot.title = element_text(hjust = 0.5))

Comments

Order of the Phoenix and Deathly Hallows are the two most negative books across the seven. The book with the most variation appears to be Half-Blood Prince, which is most clearly evidenced in the visualization using the Bing Lexicon.

Sentiments

From the three different sentiment lexicons, the Harry Potter books on the whole appear to offer overwhelmingly negative sentiments.

After the third book, the books are divided into more chapters and they seem to get progressively more negative. The widest range seems to be in Order of the Phoenix, so I will analyze that book.

order_phx <- potter_tidy %>% 
  filter(book == "Order of the Phoenix")

order_phx

## # A tibble: 258,763 × 3
##    book                 chapter word    
##    <fct>                  <int> <chr>   
##  1 Order of the Phoenix       1 dudley  
##  2 Order of the Phoenix       1 demented
##  3 Order of the Phoenix       1 the     
##  4 Order of the Phoenix       1 hottest 
##  5 Order of the Phoenix       1 day     
##  6 Order of the Phoenix       1 of      
##  7 Order of the Phoenix       1 the     
##  8 Order of the Phoenix       1 summer  
##  9 Order of the Phoenix       1 so      
## 10 Order of the Phoenix       1 far     
## # … with 258,753 more rows

afinn <- order_phx %>% 
  inner_join(get_sentiments("afinn")) %>% 
  group_by(index = chapter) %>% 
  summarise(sentiment = sum(value)) %>% 
  mutate(method = "AFINN")

## Joining, by = "word"

bing_and_nrc <- bind_rows(
  order_phx %>% 
    inner_join(get_sentiments("bing")) %>%
    mutate(method = "Bing et al."),
  order_phx %>% 
    inner_join(get_sentiments("nrc") %>% 
                 filter(sentiment %in% c("positive", 
                                         "negative"))
    ) %>%
    mutate(method = "NRC")) %>%
  count(method, index = chapter, sentiment) %>%
  pivot_wider(names_from = sentiment,
              values_from = n,
              values_fill = 0) %>% 
  mutate(sentiment = positive - negative)

## Joining, by = "word"
## Joining, by = "word"

loughran <- order_phx %>% 
    inner_join(get_sentiments("loughran")) %>%
    mutate(method = "Loughran") %>%
    group_by(index = chapter) %>%
    filter(sentiment %in% c("positive", "negative")) %>%
    mutate(method = "Loughran") %>%
  count(method, index = chapter, sentiment) %>%
  pivot_wider(names_from = sentiment,
              values_from = n,
              values_fill = 0) %>% 
  mutate(sentiment = positive - negative)

## Joining, by = "word"

bind_rows(afinn, 
          bing_and_nrc, loughran) %>%
  ggplot(aes(index, sentiment, fill = method)) +
  geom_col(show.legend = FALSE) +
  coord_cartesian(ylim = c(-300, 125))+
  facet_wrap(~method, ncol = 1, scales = "free_y")

Further Analysis

NRC seems the most polarized, so I will use NRC to find the ratio of negative words to total words. One peculiar result is that the Loughran Lexicon did not return any positive sentiments.

get_sentiments("nrc") %>% 
  filter(sentiment %in% c("positive", "negative")) %>% 
  count(sentiment)

## # A tibble: 2 × 2
##   sentiment     n
##   <chr>     <int>
## 1 negative   3318
## 2 positive   2308

get_sentiments("bing") %>% 
  count(sentiment)

## # A tibble: 2 × 2
##   sentiment     n
##   <chr>     <int>
## 1 negative   4781
## 2 positive   2005

get_sentiments("loughran") %>% 
  filter(sentiment %in% c("positive", "negative")) %>% 
  count(sentiment)

## # A tibble: 2 × 2
##   sentiment     n
##   <chr>     <int>
## 1 negative   2355
## 2 positive    354

stone_chapters <- as.numeric(nrow(data.frame(harrypotter::philosophers_stone)))
chamber_chapters <- as.numeric(nrow(data.frame(harrypotter::chamber_of_secrets)))
prisoner_chapters <- as.numeric(nrow(data.frame(harrypotter::prisoner_of_azkaban)))
goblet_chapters <- as.numeric(nrow(data.frame(harrypotter::goblet_of_fire)))
order_chapters <- as.numeric(nrow(data.frame(harrypotter::order_of_the_phoenix)))
prince_chapters <- as.numeric(nrow(data.frame(harrypotter::half_blood_prince)))
deathly_chapters <- as.numeric(nrow(data.frame(harrypotter::deathly_hallows)))

chapters <- data.frame(c(stone_chapters, chamber_chapters, prisoner_chapters, goblet_chapters, order_chapters, prince_chapters, deathly_chapters))
chapters <- cbind(titles,chapters)
colnames(chapters) <- c("title", "count")


nrcnegative <- get_sentiments("nrc") %>% 
  filter(sentiment == "negative")

wordcounts <- potter_tidy %>%
  group_by(book, chapter) %>%
  summarize(words = n())

## `summarise()` has grouped output by 'book'. You can override using the `.groups` argument.

potter_tidy %>%
  semi_join(nrcnegative) %>%
  group_by(book, chapter) %>%
  summarize(negativewords = n()) %>%
  left_join(wordcounts, by = c("book", "chapter")) %>%
  mutate(ratio = negativewords/words) %>%
  filter(chapter != 0) %>%
  slice_max(ratio, n = 1) %>% 
  ungroup()

## Joining, by = "word"
## `summarise()` has grouped output by 'book'. You can override using the `.groups` argument.

## # A tibble: 7 × 5
##   book                 chapter negativewords words  ratio
##   <fct>                  <int>         <int> <int>  <dbl>
## 1 Philosopher's Stone       17           318  5488 0.0579
## 2 Chamber of Secrets         2           199  2894 0.0688
## 3 Prisoner of Azkaban        3           284  4415 0.0643
## 4 Goblet of Fire            32           159  2010 0.0791
## 5 Order of the Phoenix      36           268  3845 0.0697
## 6 Half-Blood Prince         28           274  3597 0.0762
## 7 Deathly Hallows           32           375  5552 0.0675

Conclusions and Alternative Plots

The most variation is found in the Order of the Phoenix and Deathly Hallows books. The Bing lexicon seems to be the most effective at getting a wider range of outcomes, which is more in line with expectation in a book. Having all of the lexicons available makes for a more accurate idea of the reality of the data. It appears that the Harry Potter books are largely negative, but I would like to explore which emotion is the main cause of the negativity. My guess is that anger would contribute more to the overall sentiment than sadness would.

Sadness contributed on average just slightly more than anger.

ggplot(potter_sentiment, aes(index, sentiment, fill = book, color = book)) +
  geom_line(show.legend = FALSE) +
  coord_cartesian(ylim = c(-300, 100))+
  facet_wrap(~book, ncol = 2, scales = "free_x")+
  ggtitle("Harry Potter Sentiments: NRC Lexicon")+
  theme(plot.title = element_text(hjust = 0.5))

ggplot(potter_sentiment, aes(index, sentiment, fill = book, color = book)) +
  geom_line(show.legend = FALSE) +
  coord_cartesian(ylim = c(-300, 100))+
  facet_wrap(~book, ncol = 2, scales = "free_x")+
  ggtitle("Harry Potter Sentiments: NRC Lexicon")+
  theme(plot.title = element_text(hjust = 0.5))+
  geom_smooth()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(potter_sentiment, aes(index, sadness, color = book)) +
  geom_line()+
  facet_wrap(~book, ncol = 2, scales = "free_x")+
  ggtitle("Harry Potter Sentiments: Sadness")+
  theme(plot.title = element_text(hjust = 0.5))

ggplot(potter_sentiment, aes(index, anger, color = book)) +
  geom_line()+
  facet_wrap(~book, ncol = 2, scales = "free_x")+
  ggtitle("Harry Potter Sentiments: Anger")+
  theme(plot.title = element_text(hjust = 0.5))

ggplot(potter_sentiment, aes(index, fear, color = book)) +
  geom_line()+
  facet_wrap(~book, ncol = 2, scales = "free_x")+
  ggtitle("Harry Potter Sentiments: Fear")+
  theme(plot.title = element_text(hjust = 0.5))

sentiments_avg <- colMeans(potter_sentiment[3:13])
category <- colnames(potter_sentiment[3:13])
avg <- data.frame()
avg <- data.frame(cbind(category,as.numeric(sentiments_avg)))
colnames(avg) <- c("category", "avg")
rownames(avg) <- c(1:11)
avg$category <- as.factor(avg$category)
avg$avg <- as.numeric(avg$avg)


ggplot(avg, aes(x = category, y = avg, fill = category)) +
  geom_col(stat = "identity")+
  ggtitle("Harry Potter Sentiment Contributions")+
  theme(plot.title = element_text(hjust = 0.5))+
  scale_x_discrete(guide = guide_axis(n.dodge = 3))

## Warning: Ignoring unknown parameters: stat