Import data

library(readtext)
library(dplyr)

poems_hunter <- readtext("C:/Users/sclee1/OneDrive/Documents/R/Madness/poems_hunter/*") %>% tibble()

poems_hunter
## # A tibble: 10 x 2
##    doc_id           text                                                        
##    <chr>            <chr>                                                       
##  1 1_Hunter_2009.t~ "It's A Mad(Off) , Mad(Off) , Mad(Off) , Mad(Off) World\nWh~
##  2 10_Hunter_2008.~ "Mad Mad World\n\nthe world is mad\ninsane!\nchristians fig~
##  3 2_Hunter_2014.t~ "What Is This Mad Race Of The Mad-Mad Modern Man?\n\nWhat i~
##  4 3_Hunter_2009.t~ "It Is A Mad, Mad, Mad World.........\n\nMoon is gone\nbut ~
##  5 4_Hunter_2014.t~ "Poetry, Poetry, Poetry, Will Madden Me And You/ You Poetry~
##  6 5_Hunter_2014.t~ "The Poets Are The Mad Men And Poetry A Mad Man's Babbling\~
##  7 6_Hunter_2013.t~ "A Mad Mad Mothers Mistake\n\nLeft left not right she's lef~
##  8 7_Hunter_2010.t~ "Thoughts In Madness, Madness In Thought\n\nMy days are mad~
##  9 8_Hunter_2012.t~ "Traces Of Madness (Madman's Song)\n\nYou would have said, ~
## 10 9_Hunter_2010.t~ "If Mad Is A Hatter Then Mad Am I\n\nIf mad is a hatter the~
poems_victorian <- readtext("C:/Users/sclee1/OneDrive/Documents/R/Madness/poems_victorian/*") %>% tibble()

poems_victorian
## # A tibble: 10 x 2
##    doc_id             text                                                      
##    <chr>              <chr>                                                     
##  1 1_Victorian_1850.~ "THE BALLAD OF RICHARD BURNELL.\n\nFrom his bed rose Rich~
##  2 10_Victorian_1820~ "The following touching Verses are taken from a Newcastle~
##  3 2_Victorian_1820.~ "THE BRANCHERS.*\n\n1.\nI sat to bask, one sunny morn,\n1~
##  4 3_Victorian_1890.~ "THE BALLAD OF THE KING’S JEST.\n\nWhen springtime flus~
##  5 4_Victorian_1850.~ "THE PENITENT FREE-TRADER.\n\nTufnell ! For the love of~
##  6 5_Victorian_1820.~ "STANZAS.\n\n“ —— And muttered, lost ! lost ! lost ~
##  7 6_Victorian_1860.~ "XV.—THE MOTHER’S LAMENT.\n\nWhen I was young, when I~
##  8 7_Victorian_1880.~ "A Stray Sunbeam.\n\nA\nSUNBEAM gone astray\n1\nUpon life~
##  9 8_Victorian_1870.~ "LADY NOEL BYRON.\n\nA\nND as she spoke, it seemed as tho~
## 10 9_Victorian_1840.~ "The Auld State Kirk.\nNEW SONG.\nTune—“ Auld Lang Sy~

Join datasets

poems_raw <- rbind(poems_hunter, poems_victorian)
poems_raw
## # A tibble: 20 x 2
##    doc_id             text                                                      
##    <chr>              <chr>                                                     
##  1 1_Hunter_2009.txt  "It's A Mad(Off) , Mad(Off) , Mad(Off) , Mad(Off) World\n~
##  2 10_Hunter_2008.txt "Mad Mad World\n\nthe world is mad\ninsane!\nchristians f~
##  3 2_Hunter_2014.txt  "What Is This Mad Race Of The Mad-Mad Modern Man?\n\nWhat~
##  4 3_Hunter_2009.txt  "It Is A Mad, Mad, Mad World.........\n\nMoon is gone\nbu~
##  5 4_Hunter_2014.txt  "Poetry, Poetry, Poetry, Will Madden Me And You/ You Poet~
##  6 5_Hunter_2014.txt  "The Poets Are The Mad Men And Poetry A Mad Man's Babblin~
##  7 6_Hunter_2013.txt  "A Mad Mad Mothers Mistake\n\nLeft left not right she's l~
##  8 7_Hunter_2010.txt  "Thoughts In Madness, Madness In Thought\n\nMy days are m~
##  9 8_Hunter_2012.txt  "Traces Of Madness (Madman's Song)\n\nYou would have said~
## 10 9_Hunter_2010.txt  "If Mad Is A Hatter Then Mad Am I\n\nIf mad is a hatter t~
## 11 1_Victorian_1850.~ "THE BALLAD OF RICHARD BURNELL.\n\nFrom his bed rose Rich~
## 12 10_Victorian_1820~ "The following touching Verses are taken from a Newcastle~
## 13 2_Victorian_1820.~ "THE BRANCHERS.*\n\n1.\nI sat to bask, one sunny morn,\n1~
## 14 3_Victorian_1890.~ "THE BALLAD OF THE KING’S JEST.\n\nWhen springtime flus~
## 15 4_Victorian_1850.~ "THE PENITENT FREE-TRADER.\n\nTufnell ! For the love of~
## 16 5_Victorian_1820.~ "STANZAS.\n\n“ —— And muttered, lost ! lost ! lost ~
## 17 6_Victorian_1860.~ "XV.—THE MOTHER’S LAMENT.\n\nWhen I was young, when I~
## 18 7_Victorian_1880.~ "A Stray Sunbeam.\n\nA\nSUNBEAM gone astray\n1\nUpon life~
## 19 8_Victorian_1870.~ "LADY NOEL BYRON.\n\nA\nND as she spoke, it seemed as tho~
## 20 9_Victorian_1840.~ "The Auld State Kirk.\nNEW SONG.\nTune—“ Auld Lang Sy~

Clean data

library(tidyr)
poems <- poems_raw %>% 
  separate(doc_id, c("ID","Database","Year"))
## Warning: Expected 3 pieces. Additional pieces discarded in 20 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20].
poems
## # A tibble: 20 x 4
##    ID    Database  Year  text                                                   
##    <chr> <chr>     <chr> <chr>                                                  
##  1 1     Hunter    2009  "It's A Mad(Off) , Mad(Off) , Mad(Off) , Mad(Off) Worl~
##  2 10    Hunter    2008  "Mad Mad World\n\nthe world is mad\ninsane!\nchristian~
##  3 2     Hunter    2014  "What Is This Mad Race Of The Mad-Mad Modern Man?\n\nW~
##  4 3     Hunter    2009  "It Is A Mad, Mad, Mad World.........\n\nMoon is gone\~
##  5 4     Hunter    2014  "Poetry, Poetry, Poetry, Will Madden Me And You/ You P~
##  6 5     Hunter    2014  "The Poets Are The Mad Men And Poetry A Mad Man's Babb~
##  7 6     Hunter    2013  "A Mad Mad Mothers Mistake\n\nLeft left not right she'~
##  8 7     Hunter    2010  "Thoughts In Madness, Madness In Thought\n\nMy days ar~
##  9 8     Hunter    2012  "Traces Of Madness (Madman's Song)\n\nYou would have s~
## 10 9     Hunter    2010  "If Mad Is A Hatter Then Mad Am I\n\nIf mad is a hatte~
## 11 1     Victorian 1850  "THE BALLAD OF RICHARD BURNELL.\n\nFrom his bed rose R~
## 12 10    Victorian 1820  "The following touching Verses are taken from a Newcas~
## 13 2     Victorian 1820  "THE BRANCHERS.*\n\n1.\nI sat to bask, one sunny morn,~
## 14 3     Victorian 1890  "THE BALLAD OF THE KING’S JEST.\n\nWhen springtime f~
## 15 4     Victorian 1850  "THE PENITENT FREE-TRADER.\n\nTufnell ! For the love~
## 16 5     Victorian 1820  "STANZAS.\n\n“ —— And muttered, lost ! lost ! lo~
## 17 6     Victorian 1860  "XV.—THE MOTHER’S LAMENT.\n\nWhen I was young, whe~
## 18 7     Victorian 1880  "A Stray Sunbeam.\n\nA\nSUNBEAM gone astray\n1\nUpon l~
## 19 8     Victorian 1870  "LADY NOEL BYRON.\n\nA\nND as she spoke, it seemed as ~
## 20 9     Victorian 1840  "The Auld State Kirk.\nNEW SONG.\nTune—“ Auld Lang~

Tokenize text data

library(tidytext)
library(stringr)

poems_cleaned <- poems %>%
  unnest_tokens(output = word, input = text) %>%
  anti_join(stop_words) %>%
  filter(!str_detect(word, "[^a-zA-Z\\s]|mad")) %>%
  mutate(Database = str_replace(Database, "Hunter", "Contemporary"))

poems_cleaned
## # A tibble: 3,552 x 4
##    ID    Database     Year  word     
##    <chr> <chr>        <chr> <chr>    
##  1 1     Contemporary 2009  world    
##  2 1     Contemporary 2009  bernard  
##  3 1     Contemporary 2009  investors
##  4 1     Contemporary 2009  banker   
##  5 1     Contemporary 2009  globe    
##  6 1     Contemporary 2009  money    
##  7 1     Contemporary 2009  adolph   
##  8 1     Contemporary 2009  hitler   
##  9 1     Contemporary 2009  blamed   
## 10 1     Contemporary 2009  jewish   
## # ... with 3,542 more rows

Visualize most frequent words

library(ggplot2)

poems_cleaned %>%
  count(Database, word, sort = TRUE) %>%
  group_by(Database) %>%
  top_n(10, n) %>%
  ungroup() %>%
  ggplot(aes(x = n, y = reorder_within(word, n, Database), fill = Database)) +
  geom_col(alpha = 0.8) +
  facet_wrap(~Database, scales = "free_y") +
  scale_y_reordered() +
  labs(y = NULL,
       x = "Word Frequency",
       title = "Top 10 Most Frequent Words")

Sentiment Analysis

Using NRC Lexicon

nrc <- get_sentiments("nrc")
nrc
## # A tibble: 13,901 x 2
##    word        sentiment
##    <chr>       <chr>    
##  1 abacus      trust    
##  2 abandon     fear     
##  3 abandon     negative 
##  4 abandon     sadness  
##  5 abandoned   anger    
##  6 abandoned   fear     
##  7 abandoned   negative 
##  8 abandoned   sadness  
##  9 abandonment anger    
## 10 abandonment fear     
## # ... with 13,891 more rows
poems_cleaned %>%
  inner_join(nrc) %>%
  count(Database, sentiment, sort = TRUE) %>%
  ggplot(aes(y = reorder_within(sentiment, n, Database), x = n, fill = Database)) +
  geom_col(alpha = 0.8) +
  facet_wrap(~Database, scales = "free_y") +
  scale_y_reordered() +
  labs(title = "Number of Words Associated with Emotions",
       y = "Emotions",
       x = "Number of Words")

Using bing Lexicon

bing <- get_sentiments("bing")
bing
## # A tibble: 6,786 x 2
##    word        sentiment
##    <chr>       <chr>    
##  1 2-faces     negative 
##  2 abnormal    negative 
##  3 abolish     negative 
##  4 abominable  negative 
##  5 abominably  negative 
##  6 abominate   negative 
##  7 abomination negative 
##  8 abort       negative 
##  9 aborted     negative 
## 10 aborts      negative 
## # ... with 6,776 more rows
poems_cleaned %>%
  inner_join(bing) %>%
  ggplot(aes(x = Database, fill = sentiment)) +
  geom_bar(position = "fill") +
  labs(title = "Ratios of Negative and Positive Words",
       y = "Proportions",
       x = NULL)

Using AFINN Lexicon

affin <- get_sentiments("afinn")
affin
## # A tibble: 2,477 x 2
##    word       value
##    <chr>      <dbl>
##  1 abandon       -2
##  2 abandoned     -2
##  3 abandons      -2
##  4 abducted      -2
##  5 abduction     -2
##  6 abductions    -2
##  7 abhor         -3
##  8 abhorred      -3
##  9 abhorrent     -3
## 10 abhors        -3
## # ... with 2,467 more rows
poems_cleaned %>%
  inner_join(affin) %>%
  group_by(Database) %>%
  summarise(sentiment_score = sum(value)) %>%
  ungroup() %>%
  ggplot(aes(x = Database, y = sentiment_score)) +
  geom_col(fill = "midnightblue", alpha = 0.8) +
  labs(title = "Sum of Sentiment Scores of Words",
       x = NULL,
       y = "Sum of Sentiment Scores")