library(readtext)
library(dplyr)
poems_hunter <- readtext("C:/Users/sclee1/OneDrive/Documents/R/Madness/poems_hunter/*") %>% tibble()
poems_hunter
## # A tibble: 10 x 2
## doc_id text
## <chr> <chr>
## 1 1_Hunter_2009.t~ "It's A Mad(Off) , Mad(Off) , Mad(Off) , Mad(Off) World\nWh~
## 2 10_Hunter_2008.~ "Mad Mad World\n\nthe world is mad\ninsane!\nchristians fig~
## 3 2_Hunter_2014.t~ "What Is This Mad Race Of The Mad-Mad Modern Man?\n\nWhat i~
## 4 3_Hunter_2009.t~ "It Is A Mad, Mad, Mad World.........\n\nMoon is gone\nbut ~
## 5 4_Hunter_2014.t~ "Poetry, Poetry, Poetry, Will Madden Me And You/ You Poetry~
## 6 5_Hunter_2014.t~ "The Poets Are The Mad Men And Poetry A Mad Man's Babbling\~
## 7 6_Hunter_2013.t~ "A Mad Mad Mothers Mistake\n\nLeft left not right she's lef~
## 8 7_Hunter_2010.t~ "Thoughts In Madness, Madness In Thought\n\nMy days are mad~
## 9 8_Hunter_2012.t~ "Traces Of Madness (Madman's Song)\n\nYou would have said, ~
## 10 9_Hunter_2010.t~ "If Mad Is A Hatter Then Mad Am I\n\nIf mad is a hatter the~
poems_victorian <- readtext("C:/Users/sclee1/OneDrive/Documents/R/Madness/poems_victorian/*") %>% tibble()
poems_victorian
## # A tibble: 10 x 2
## doc_id text
## <chr> <chr>
## 1 1_Victorian_1850.~ "THE BALLAD OF RICHARD BURNELL.\n\nFrom his bed rose Rich~
## 2 10_Victorian_1820~ "The following touching Verses are taken from a Newcastle~
## 3 2_Victorian_1820.~ "THE BRANCHERS.*\n\n1.\nI sat to bask, one sunny morn,\n1~
## 4 3_Victorian_1890.~ "THE BALLAD OF THE KING’S JEST.\n\nWhen springtime flus~
## 5 4_Victorian_1850.~ "THE PENITENT FREE-TRADER.\n\nTufnell ! For the love of~
## 6 5_Victorian_1820.~ "STANZAS.\n\n“ —— And muttered, lost ! lost ! lost ~
## 7 6_Victorian_1860.~ "XV.—THE MOTHER’S LAMENT.\n\nWhen I was young, when I~
## 8 7_Victorian_1880.~ "A Stray Sunbeam.\n\nA\nSUNBEAM gone astray\n1\nUpon life~
## 9 8_Victorian_1870.~ "LADY NOEL BYRON.\n\nA\nND as she spoke, it seemed as tho~
## 10 9_Victorian_1840.~ "The Auld State Kirk.\nNEW SONG.\nTune—“ Auld Lang Sy~
Join datasets
poems_raw <- rbind(poems_hunter, poems_victorian)
poems_raw
## # A tibble: 20 x 2
## doc_id text
## <chr> <chr>
## 1 1_Hunter_2009.txt "It's A Mad(Off) , Mad(Off) , Mad(Off) , Mad(Off) World\n~
## 2 10_Hunter_2008.txt "Mad Mad World\n\nthe world is mad\ninsane!\nchristians f~
## 3 2_Hunter_2014.txt "What Is This Mad Race Of The Mad-Mad Modern Man?\n\nWhat~
## 4 3_Hunter_2009.txt "It Is A Mad, Mad, Mad World.........\n\nMoon is gone\nbu~
## 5 4_Hunter_2014.txt "Poetry, Poetry, Poetry, Will Madden Me And You/ You Poet~
## 6 5_Hunter_2014.txt "The Poets Are The Mad Men And Poetry A Mad Man's Babblin~
## 7 6_Hunter_2013.txt "A Mad Mad Mothers Mistake\n\nLeft left not right she's l~
## 8 7_Hunter_2010.txt "Thoughts In Madness, Madness In Thought\n\nMy days are m~
## 9 8_Hunter_2012.txt "Traces Of Madness (Madman's Song)\n\nYou would have said~
## 10 9_Hunter_2010.txt "If Mad Is A Hatter Then Mad Am I\n\nIf mad is a hatter t~
## 11 1_Victorian_1850.~ "THE BALLAD OF RICHARD BURNELL.\n\nFrom his bed rose Rich~
## 12 10_Victorian_1820~ "The following touching Verses are taken from a Newcastle~
## 13 2_Victorian_1820.~ "THE BRANCHERS.*\n\n1.\nI sat to bask, one sunny morn,\n1~
## 14 3_Victorian_1890.~ "THE BALLAD OF THE KING’S JEST.\n\nWhen springtime flus~
## 15 4_Victorian_1850.~ "THE PENITENT FREE-TRADER.\n\nTufnell ! For the love of~
## 16 5_Victorian_1820.~ "STANZAS.\n\n“ —— And muttered, lost ! lost ! lost ~
## 17 6_Victorian_1860.~ "XV.—THE MOTHER’S LAMENT.\n\nWhen I was young, when I~
## 18 7_Victorian_1880.~ "A Stray Sunbeam.\n\nA\nSUNBEAM gone astray\n1\nUpon life~
## 19 8_Victorian_1870.~ "LADY NOEL BYRON.\n\nA\nND as she spoke, it seemed as tho~
## 20 9_Victorian_1840.~ "The Auld State Kirk.\nNEW SONG.\nTune—“ Auld Lang Sy~
library(tidyr)
poems <- poems_raw %>%
separate(doc_id, c("ID","Database","Year"))
## Warning: Expected 3 pieces. Additional pieces discarded in 20 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20].
poems
## # A tibble: 20 x 4
## ID Database Year text
## <chr> <chr> <chr> <chr>
## 1 1 Hunter 2009 "It's A Mad(Off) , Mad(Off) , Mad(Off) , Mad(Off) Worl~
## 2 10 Hunter 2008 "Mad Mad World\n\nthe world is mad\ninsane!\nchristian~
## 3 2 Hunter 2014 "What Is This Mad Race Of The Mad-Mad Modern Man?\n\nW~
## 4 3 Hunter 2009 "It Is A Mad, Mad, Mad World.........\n\nMoon is gone\~
## 5 4 Hunter 2014 "Poetry, Poetry, Poetry, Will Madden Me And You/ You P~
## 6 5 Hunter 2014 "The Poets Are The Mad Men And Poetry A Mad Man's Babb~
## 7 6 Hunter 2013 "A Mad Mad Mothers Mistake\n\nLeft left not right she'~
## 8 7 Hunter 2010 "Thoughts In Madness, Madness In Thought\n\nMy days ar~
## 9 8 Hunter 2012 "Traces Of Madness (Madman's Song)\n\nYou would have s~
## 10 9 Hunter 2010 "If Mad Is A Hatter Then Mad Am I\n\nIf mad is a hatte~
## 11 1 Victorian 1850 "THE BALLAD OF RICHARD BURNELL.\n\nFrom his bed rose R~
## 12 10 Victorian 1820 "The following touching Verses are taken from a Newcas~
## 13 2 Victorian 1820 "THE BRANCHERS.*\n\n1.\nI sat to bask, one sunny morn,~
## 14 3 Victorian 1890 "THE BALLAD OF THE KING’S JEST.\n\nWhen springtime f~
## 15 4 Victorian 1850 "THE PENITENT FREE-TRADER.\n\nTufnell ! For the love~
## 16 5 Victorian 1820 "STANZAS.\n\n“ —— And muttered, lost ! lost ! lo~
## 17 6 Victorian 1860 "XV.—THE MOTHER’S LAMENT.\n\nWhen I was young, whe~
## 18 7 Victorian 1880 "A Stray Sunbeam.\n\nA\nSUNBEAM gone astray\n1\nUpon l~
## 19 8 Victorian 1870 "LADY NOEL BYRON.\n\nA\nND as she spoke, it seemed as ~
## 20 9 Victorian 1840 "The Auld State Kirk.\nNEW SONG.\nTune—“ Auld Lang~
library(tidytext)
library(stringr)
poems_cleaned <- poems %>%
unnest_tokens(output = word, input = text) %>%
anti_join(stop_words) %>%
filter(!str_detect(word, "[^a-zA-Z\\s]|mad")) %>%
mutate(Database = str_replace(Database, "Hunter", "Contemporary"))
poems_cleaned
## # A tibble: 3,552 x 4
## ID Database Year word
## <chr> <chr> <chr> <chr>
## 1 1 Contemporary 2009 world
## 2 1 Contemporary 2009 bernard
## 3 1 Contemporary 2009 investors
## 4 1 Contemporary 2009 banker
## 5 1 Contemporary 2009 globe
## 6 1 Contemporary 2009 money
## 7 1 Contemporary 2009 adolph
## 8 1 Contemporary 2009 hitler
## 9 1 Contemporary 2009 blamed
## 10 1 Contemporary 2009 jewish
## # ... with 3,542 more rows
Visualize most frequent words
library(ggplot2)
poems_cleaned %>%
count(Database, word, sort = TRUE) %>%
group_by(Database) %>%
top_n(10, n) %>%
ungroup() %>%
ggplot(aes(x = n, y = reorder_within(word, n, Database), fill = Database)) +
geom_col(alpha = 0.8) +
facet_wrap(~Database, scales = "free_y") +
scale_y_reordered() +
labs(y = NULL,
x = "Word Frequency",
title = "Top 10 Most Frequent Words")
nrc <- get_sentiments("nrc")
nrc
## # A tibble: 13,901 x 2
## word sentiment
## <chr> <chr>
## 1 abacus trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandoned anger
## 6 abandoned fear
## 7 abandoned negative
## 8 abandoned sadness
## 9 abandonment anger
## 10 abandonment fear
## # ... with 13,891 more rows
poems_cleaned %>%
inner_join(nrc) %>%
count(Database, sentiment, sort = TRUE) %>%
ggplot(aes(y = reorder_within(sentiment, n, Database), x = n, fill = Database)) +
geom_col(alpha = 0.8) +
facet_wrap(~Database, scales = "free_y") +
scale_y_reordered() +
labs(title = "Number of Words Associated with Emotions",
y = "Emotions",
x = "Number of Words")
bing <- get_sentiments("bing")
bing
## # A tibble: 6,786 x 2
## word sentiment
## <chr> <chr>
## 1 2-faces negative
## 2 abnormal negative
## 3 abolish negative
## 4 abominable negative
## 5 abominably negative
## 6 abominate negative
## 7 abomination negative
## 8 abort negative
## 9 aborted negative
## 10 aborts negative
## # ... with 6,776 more rows
poems_cleaned %>%
inner_join(bing) %>%
ggplot(aes(x = Database, fill = sentiment)) +
geom_bar(position = "fill") +
labs(title = "Ratios of Negative and Positive Words",
y = "Proportions",
x = NULL)
affin <- get_sentiments("afinn")
affin
## # A tibble: 2,477 x 2
## word value
## <chr> <dbl>
## 1 abandon -2
## 2 abandoned -2
## 3 abandons -2
## 4 abducted -2
## 5 abduction -2
## 6 abductions -2
## 7 abhor -3
## 8 abhorred -3
## 9 abhorrent -3
## 10 abhors -3
## # ... with 2,467 more rows
poems_cleaned %>%
inner_join(affin) %>%
group_by(Database) %>%
summarise(sentiment_score = sum(value)) %>%
ungroup() %>%
ggplot(aes(x = Database, y = sentiment_score)) +
geom_col(fill = "midnightblue", alpha = 0.8) +
labs(title = "Sum of Sentiment Scores of Words",
x = NULL,
y = "Sum of Sentiment Scores")