Installing the necessary packages

## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: RColorBrewer
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.6     v stringr 1.4.0
## v tidyr   1.2.0     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
## Package version: 3.2.0
## Unicode version: 13.0
## ICU version: 69.1
## Parallel computing: 4 of 4 threads used.
## See https://quanteda.io for tutorials and examples.
## 
## Attaching package: 'quanteda'
## The following object is masked from 'package:tm':
## 
##     stopwords
## The following objects are masked from 'package:NLP':
## 
##     meta, meta<-

Install the first book for analysis using the book ID number on Project Gutenberg. The first book I want to work with is the one that started it all, The Wealth of Nations by Adam Smith. I am

#Looking up the right book # in Project Gutenberg
gutenberg_metadata %>%
  filter(title == "An Inquiry into the Nature and Causes of the Wealth of Nations")
## # A tibble: 1 x 8
##   gutenberg_id title    author gutenberg_autho~ language gutenberg_books~ rights
##          <int> <chr>    <chr>             <int> <chr>    <chr>            <chr> 
## 1         3300 An Inqu~ Smith~             1158 en       Harvard Classic~ Publi~
## # ... with 1 more variable: has_text <lgl>
gutenberg_metadata %>%
  filter(title == "The Economic Consequences of the Peace") 
## # A tibble: 1 x 8
##   gutenberg_id title    author gutenberg_autho~ language gutenberg_books~ rights
##          <int> <chr>    <chr>             <int> <chr>    <chr>            <chr> 
## 1        15776 The Eco~ Keyne~             6280 en       World War I/Bes~ Publi~
## # ... with 1 more variable: has_text <lgl>
gutenberg_metadata %>%
filter(title == "Lombard Street: A Description of the Money Market")
## # A tibble: 1 x 8
##   gutenberg_id title    author gutenberg_autho~ language gutenberg_books~ rights
##          <int> <chr>    <chr>             <int> <chr>    <chr>            <chr> 
## 1         4359 Lombard~ Bageh~             1461 en       <NA>             Publi~
## # ... with 1 more variable: has_text <lgl>
gutenberg_metadata %>%
filter(title == "The History of Currency, 1252 to 1896")
## # A tibble: 1 x 8
##   gutenberg_id title    author gutenberg_autho~ language gutenberg_books~ rights
##          <int> <chr>    <chr>             <int> <chr>    <chr>            <chr> 
## 1        38381 The His~ Shaw,~            39433 en       <NA>             Publi~
## # ... with 1 more variable: has_text <lgl>

For now I am going to work with two books. The wealth of nations and economic consequences of the peace.

 wealth_of_nations <- gutenberg_download(3300, mirror = "http://mirrors.xmission.com/gutenberg/")

 consequence_of_peace <- gutenberg_download(15776, mirror = "http://mirrors.xmission.com/gutenberg/")
## cols(
##   gutenberg_id = col_double(),
##   text = col_character()
## )
## Rows: 34546 Columns: 2
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): text
## dbl (1): gutenberg_id
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 34,546 x 2
##    gutenberg_id text                                                          
##           <dbl> <chr>                                                         
##  1         3300 An Inquiry into the Nature and Causes of the Wealth of Nations
##  2         3300 <NA>                                                          
##  3         3300 <NA>                                                          
##  4         3300 <NA>                                                          
##  5         3300 by Adam Smith                                                 
##  6         3300 <NA>                                                          
##  7         3300 <NA>                                                          
##  8         3300 <NA>                                                          
##  9         3300 <NA>                                                          
## 10         3300 <NA>                                                          
## # ... with 34,536 more rows
## Rows: 7284 Columns: 2
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): text
## dbl (1): gutenberg_id
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 7,284 x 2
##    gutenberg_id text                                  
##           <dbl> <chr>                                 
##  1        15776 THE ECONOMIC CONSEQUENCES OF THE PEACE
##  2        15776 <NA>                                  
##  3        15776 by                                    
##  4        15776 <NA>                                  
##  5        15776 JOHN MAYNARD KEYNES, C.B.             
##  6        15776 Fellow of King's College, Cambridge   
##  7        15776 <NA>                                  
##  8        15776 New York                              
##  9        15776 Harcourt, Brace and Howe              
## 10        15776 <NA>                                  
## # ... with 7,274 more rows

Cleanup

I need to do a bit of cleanup on both of these documents. I

#Remove NA from Wealth of Nations
wealth_of_nations <- 
  wealth_of_nations %>%
    mutate(text = str_remove_all(text, "NA"))
#Remove NA from Consequence of Peace
consequence_of_peace <- 
  consequence_of_peace %>%
    mutate(text = str_remove_all(text, "NA"))
wealth_of_nations
## # A tibble: 34,546 x 2
##    gutenberg_id text                                                            
##           <int> <chr>                                                           
##  1         3300 "An Inquiry into the Nature and Causes of the Wealth of Nations"
##  2         3300 ""                                                              
##  3         3300 ""                                                              
##  4         3300 ""                                                              
##  5         3300 "by Adam Smith"                                                 
##  6         3300 ""                                                              
##  7         3300 ""                                                              
##  8         3300 ""                                                              
##  9         3300 ""                                                              
## 10         3300 ""                                                              
## # ... with 34,536 more rows
wealth_of_nations_clean <- 
  wealth_of_nations %>% 
  # The actual book doesn't start until line 187
  slice(187:n()) %>% 
  # Get rid of rows where text is missing
  drop_na(text) %>% 
  # Seperated into multiple books. Chapters are continuous through the different books
  # cumsum() calculates the cumulative sum, so it'll increase every time 
  # there's a new book and automatically make book and chapter numbers
  mutate(book = str_detect(text, "^BOOK"),
         book_number = cumsum(book))%>%
  mutate(chapter = str_detect(text, "^CHAPTER"),
         chapter_number = cumsum(chapter))%>% 
  # Remove columns we don't need
  select(-book, -chapter, -gutenberg_id)

consequence_of_peace_clean <-
  consequence_of_peace %>% 
  # The actual book doesn't start until line 63
  slice(63:n()) %>% 
  # Get rid of rows where text is missing
  drop_na(text) %>% 
  # Seperated into multiple chapters. Chapters are continuous through the different books
  # cumsum() calculates the cumulative sum, so it'll increase every time 
  # there's a new book and automatically make book and chapter numbers
  mutate(chapter = str_detect(text, "^CHAPTER"),
         chapter_number = cumsum(chapter))%>% 
  # Remove columns we don't need
  select( -chapter, -gutenberg_id)
wealth_of_nations_clean
## # A tibble: 34,360 x 3
##    text                                               book_number chapter_number
##    <chr>                                                    <int>          <int>
##  1 "BOOK I."                                                    1              0
##  2 "OF THE CAUSES OF IMPROVEMENT IN THE"                        1              0
##  3 "PRODUCTIVE POWERS OF LABOUR, AND OF THE ORDER AC~           1              0
##  4 "TURALLY DISTRIBUTED AMONG THE DIFFERENT RANKS OF~           1              0
##  5 "    "                                                       1              0
##  6 ""                                                           1              0
##  7 ""                                                           1              0
##  8 "CHAPTER I."                                                 1              1
##  9 "OF THE DIVISION OF LABOUR."                                 1              1
## 10 ""                                                           1              1
## # ... with 34,350 more rows
word_frequencies_won <- wealth_of_nations_clean %>%
  # The unnest_tokens() functions from tidytext counts words 
  # or bigram or paragraph to be in its own row
  unnest_tokens(word, text) %>% 
  # Remove stop words
  anti_join(stop_words) %>% 
  # use str_extract() here because the UTF-8 encoded texts 
  # from Project Gutenberg have some examples of words with 
  # underscores around them to indicate emphasis (like italics, 
  # ex: count “_any_” separately from “any” not good for counting word).
  mutate(word = str_extract(word, "[a-z']+")) %>% 
  # Count all the words
  count(word, sort = TRUE)
## Joining, by = "word"
#what does this look like?
word_frequencies_won
## # A tibble: 8,966 x 2
##    word         n
##    <chr>    <int>
##  1 <NA>      2786
##  2 price     1259
##  3 country   1235
##  4 labour     989
##  5 trade      970
##  6 produce    934
##  7 quantity   796
##  8 people     775
##  9 money      770
## 10 land       717
## # ... with 8,956 more rows

A Little Exploration of the two books

So now I have this pesky NA popping up. I am going to remove them because I definitely don’t want to count them.

#remove na from word frequency
word_frequencies_won <- na.omit(word_frequencies_won, c("n"))
word_frequencies_won
## # A tibble: 8,965 x 2
##    word         n
##    <chr>    <int>
##  1 price     1259
##  2 country   1235
##  3 labour     989
##  4 trade      970
##  5 produce    934
##  6 quantity   796
##  7 people     775
##  8 money      770
##  9 land       717
## 10 revenue    685
## # ... with 8,955 more rows
word_frequencies_won %>% 
  # Keep top 15
  top_n(15) %>%
  # Make the words an ordered factor so they plot in order
  mutate(word = fct_inorder(word)) %>% 
  ggplot(aes(x = n, y = word))+
  geom_col()
## Selecting by n

I’m goinna do the same count for consequence of peace as I did for the wealth of nations above.

word_frequencies_cop <- consequence_of_peace_clean %>%
  # The unnest_tokens() functions from tidytext counts words 
  # or bigram or paragraph to be in its own row
  unnest_tokens(word, text) %>% 
  # Remove stop words
  anti_join(stop_words) %>% 
  # use str_extract() here because the UTF-8 encoded texts 
  # from Project Gutenberg have some examples of words with 
  # underscores around them to indicate emphasis (like italics, 
  # ex: count “_any_” separately from “any” not good for counting word).
  mutate(word = str_extract(word, "[a-z']+")) %>% 
  # Count all the words
  count(word, sort = TRUE)
## Joining, by = "word"
word_frequencies_cop <- na.omit(word_frequencies_cop, c("n"))
word_frequencies_cop
## # A tibble: 5,805 x 2
##    word           n
##    <chr>      <int>
##  1 germany      417
##  2 german       245
##  3 war          224
##  4 treaty       170
##  5 allies       156
##  6 europe       149
##  7 france       146
##  8 reparation   141
##  9 economic     132
## 10 commission   124
## # ... with 5,795 more rows
word_frequencies_cop %>% 
  # Keep top 15
  top_n(15) %>%
  # Make the words an ordered factor so they plot in order
  mutate(word = fct_inorder(word)) %>% 
  ggplot(aes(x = n, y = word))+
  geom_col()
## Selecting by n

Economic Consequence of Peace is clearly written at a different time than wealth of nations and seems to focus on the countries affected most by WWI. Wealth of Nations doesn’t have a war to talk about, but the two books do share a relationship in talking about the economy, trade, and money.

wealth_of_nations_bigrams <- wealth_of_nations_clean %>% 
  unnest_tokens(bigram, text, token = "ngrams", n = 2)
wealth_of_nations_bigrams
## # A tibble: 351,769 x 3
##    book_number chapter_number bigram           
##          <int>          <int> <chr>            
##  1           1              0 book i           
##  2           1              0 of the           
##  3           1              0 the causes       
##  4           1              0 causes of        
##  5           1              0 of improvement   
##  6           1              0 improvement in   
##  7           1              0 in the           
##  8           1              0 productive powers
##  9           1              0 powers of        
## 10           1              0 of labour        
## # ... with 351,759 more rows

So my earlier code to remove the NA apparently didn’t work on the whole data frame. I am going to do a step by step for this because I’m not really sure why it didn’t solve the issue. For the word frequency there were well over 2000 cases of NA and I know from how the DF is setup it will occur many times as a bigram which I don’t want.

wealth_of_nations_bigrams <- wealth_of_nations_bigrams %>%
      separate(bigram, c("w1", "w2"), sep = " ") 
wealth_of_nations_bigrams
## # A tibble: 351,769 x 4
##    book_number chapter_number w1          w2         
##          <int>          <int> <chr>       <chr>      
##  1           1              0 book        i          
##  2           1              0 of          the        
##  3           1              0 the         causes     
##  4           1              0 causes      of         
##  5           1              0 of          improvement
##  6           1              0 improvement in         
##  7           1              0 in          the        
##  8           1              0 productive  powers     
##  9           1              0 powers      of         
## 10           1              0 of          labour     
## # ... with 351,759 more rows
data(stop_words)

wealth_of_nations_bigrams <- wealth_of_nations_bigrams %>%
      filter(!w1 %in% stop_words$word) %>%
      filter(!w2 %in% stop_words$word)

wealth_of_nations_bigrams
## # A tibble: 27,209 x 4
##    book_number chapter_number w1         w2         
##          <int>          <int> <chr>      <chr>      
##  1           1              0 productive powers     
##  2           1              0 turally    distributed
##  3           1              0 <NA>       <NA>       
##  4           1              0 <NA>       <NA>       
##  5           1              0 <NA>       <NA>       
##  6           1              1 <NA>       <NA>       
##  7           1              1 <NA>       <NA>       
##  8           1              1 <NA>       <NA>       
##  9           1              1 productive powers     
## 10           1              1 skill      dexterity  
## # ... with 27,199 more rows
wealth_of_nations_bigrams <- wealth_of_nations_bigrams %>%
                      filter(!is.na(w1)) %>% 
                      filter(!is.na(w2))
wealth_of_nations_bigrams
## # A tibble: 24,554 x 4
##    book_number chapter_number w1         w2          
##          <int>          <int> <chr>      <chr>       
##  1           1              0 productive powers      
##  2           1              0 turally    distributed 
##  3           1              1 productive powers      
##  4           1              1 skill      dexterity   
##  5           1              1 easily     understood  
##  6           1              1 carried    furthest    
##  7           1              1 trifling   manufactures
##  8           1              1 single     branch      
##  9           1              1 trifling   nature      
## 10           1              1 trifling   manufacture 
## # ... with 24,544 more rows
wealth_of_nations_bigrams <- wealth_of_nations_bigrams %>%
                      unite(bigram, w1, w2, sep=" ")
wealth_of_nations_bigrams
## # A tibble: 24,554 x 3
##    book_number chapter_number bigram               
##          <int>          <int> <chr>                
##  1           1              0 productive powers    
##  2           1              0 turally distributed  
##  3           1              1 productive powers    
##  4           1              1 skill dexterity      
##  5           1              1 easily understood    
##  6           1              1 carried furthest     
##  7           1              1 trifling manufactures
##  8           1              1 single branch        
##  9           1              1 trifling nature      
## 10           1              1 trifling manufacture 
## # ... with 24,544 more rows

Just wondering now if “trifling” is in any of the dictionaries for when I do a sentiment analysis. I hope it is!

bigram_frequencies_won <- wealth_of_nations_bigrams %>% 
  # Count all the bigrams
  count(bigram, sort = TRUE)
 
bigram_frequencies_won %>% 
  top_n(15) %>%
  mutate(bigram = fct_inorder(bigram)) %>% 
  ggplot(aes(x = n, y = bigram))+
  geom_col() +
  labs(y = "Count", x = NULL, 
       title = "15 most frequent bigrams in Wealth of Nations")
## Selecting by n

consequence_of_peace_bigrams <- consequence_of_peace_clean %>% 
                      unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
                      separate(bigram, c("w1", "w2"), sep = " ") %>%
                      filter(!w1 %in% stop_words$word) %>%
                      filter(!w2 %in% stop_words$word) %>%
                      filter(!is.na(w1)) %>% 
                      filter(!is.na(w2)) %>% 
                      unite(bigram, w1, w2, sep=" ")

consequence_of_peace_bigrams
## # A tibble: 7,149 x 2
##    chapter_number bigram                
##             <int> <chr>                 
##  1              0 intensely unusual     
##  2              0 unusual unstable      
##  3              0 unstable complicated  
##  4              0 complicated unreliable
##  5              0 unreliable temporary  
##  6              0 temporary nature      
##  7              0 economic organization 
##  8              0 western europe        
##  9              0 half century          
## 10              0 late advantages       
## # ... with 7,139 more rows
bigram_frequencies_cop <- consequence_of_peace_bigrams %>% 
  # Count all the bigrams
  count(bigram, sort = TRUE)
 
bigram_frequencies_cop %>% 
  top_n(15) %>%
  mutate(bigram = fct_inorder(bigram)) %>% 
  ggplot(aes(x = n, y = bigram))+
  geom_col() +
  labs(y = "Count", x = NULL, 
       title = "15 most frequent bigrams in Economic Consequence of Peace")
## Selecting by n

I’m thinking that with the Bigrams and Trigrams from Wealth of Nations and Economic Consequences of Peace I can focus in on some particulars that the authors may both find important based on use. How connected are the two economists? Are the same subject from Adam Smiths time still relevant in Keynes’s time?

bigram_counts_won <- wealth_of_nations_bigrams %>% count(bigram, sort=TRUE)
bigram_counts_won
## # A tibble: 14,131 x 2
##    bigram                n
##    <chr>             <int>
##  1 annual produce      148
##  2 foreign trade       102
##  3 money price          89
##  4 home market          85
##  5 rude produce         75
##  6 0 0                  65
##  7 productive labour    60
##  8 surplus produce      60
##  9 thousand pounds      60
## 10 east indies          56
## # ... with 14,121 more rows
bigram_counts_won <- wealth_of_nations_clean %>% 
                      unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
                      separate(bigram, c("w1", "w2"), sep = " ") %>%
                      filter(!w1 %in% stop_words$word) %>%
                      filter(!w2 %in% stop_words$word) %>%
                      filter(!is.na(w1)) %>% 
                      filter(!is.na(w2)) %>% 
                      count(w1, w2, sort=TRUE)

bigram_counts_won
## # A tibble: 14,131 x 3
##    w1         w2          n
##    <chr>      <chr>   <int>
##  1 annual     produce   148
##  2 foreign    trade     102
##  3 money      price      89
##  4 home       market     85
##  5 rude       produce    75
##  6 0          0          65
##  7 productive labour     60
##  8 surplus    produce    60
##  9 thousand   pounds     60
## 10 east       indies     56
## # ... with 14,121 more rows
library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
bigram_graph_won <- bigram_counts_won %>%
                  filter(n >= 56) %>%
                  graph_from_data_frame()

bigram_graph_won
## IGRAPH 7ce17ae DN-- 17 10 -- 
## + attr: name (v/c), n (e/n)
## + edges from 7ce17ae (vertex names):
##  [1] annual    ->produce foreign   ->trade   money     ->price  
##  [4] home      ->market  rude      ->produce 0         ->0      
##  [7] productive->labour  surplus   ->produce thousand  ->pounds 
## [10] east      ->indies
ggraph(bigram_graph_won, layout = "fr") +
     geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
                    arrow = grid::arrow(type = "closed", length = unit(2, "mm")), 
                    end_cap = circle(1, "mm")) +
     geom_node_point(color = "lightblue", size = 2) +
     geom_node_text(aes(label = name), size = 2) +
     theme_void()

The above visual is very interesting. I think it depicts the relationship between the words in the bigrams and how they are used together. I will also do this for the other book. For right now it seems as though they cover different subjects. I want to do a little more digging before I do a sentiment analysis on anything. Let’s see how it looks on consequence of peace

bigram_counts_cop <- consequence_of_peace_bigrams %>% count(bigram, sort=TRUE)


bigram_counts_cop <- consequence_of_peace_clean %>% 
                      unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
                      separate(bigram, c("w1", "w2"), sep = " ") %>%
                      filter(!w1 %in% stop_words$word) %>%
                      filter(!w2 %in% stop_words$word) %>%
                      filter(!is.na(w1)) %>% 
                      filter(!is.na(w2)) %>% 
                      count(w1, w2, sort=TRUE)



bigram_graph_cop <- bigram_counts_cop %>%
                  filter(n >= 10) %>%
                  graph_from_data_frame()
bigram_graph_cop
## IGRAPH 7dfe4ab DN-- 37 20 -- 
## + attr: name (v/c), n (e/n)
## + edges from 7dfe4ab (vertex names):
##  [1] reparation->commission  pre       ->war         german    ->government 
##  [4] upper     ->silesia     prime     ->minister    united    ->kingdom    
##  [7] alsace    ->lorraine    austria   ->hungary     germany's ->capacity   
## [10] economic  ->life        reparation->chapter     raw       ->materials  
## [13] central   ->europe      german    ->nationals   allied    ->countries  
## [16] allied    ->governments export    ->trade       foreign   ->securities 
## [19] inter     ->ally        iron      ->ore
ggraph(bigram_graph_cop, layout = "fr") +
     geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
                    arrow = grid::arrow(type = "closed", length = unit(2, "mm")), 
                    end_cap = circle(1, "mm")) +
     geom_node_point(color = "lightblue", size = 2) +
     geom_node_text(aes(label = name), size = 2) +
     theme_void()

wealth_of_nations_trigrams <- wealth_of_nations_clean %>% 
                      unnest_tokens(trigram, text, token = "ngrams", n = 3) %>%
                      separate(trigram, c("w1", "w2", "w3"), sep = " ") %>%
                      filter(!w1 %in% stop_words$word) %>%
                      filter(!w2 %in% stop_words$word) %>%
                      filter(!is.na(w1)) %>% 
                      filter(!is.na(w2)) %>% 
                      filter(!is.na(w3)) %>% 
                      unite(trigram, w1, w2, w3, sep=" ")

wealth_of_nations_trigrams
## # A tibble: 22,125 x 3
##    book_number chapter_number trigram                    
##          <int>          <int> <chr>                      
##  1           1              0 productive powers of       
##  2           1              0 turally distributed among  
##  3           1              1 productive powers of       
##  4           1              1 skill dexterity and        
##  5           1              1 easily understood by       
##  6           1              1 carried furthest in        
##  7           1              1 trifling manufactures which
##  8           1              1 single branch though       
##  9           1              1 trifling nature the        
## 10           1              1 trifling manufacture but   
## # ... with 22,115 more rows
trigram_frequencies_won <- wealth_of_nations_trigrams %>% 
  # Count all the trigrams
  count(trigram, sort = TRUE)
 
trigram_frequencies_won %>% 
  top_n(15) %>%
  mutate(trigram = fct_inorder(trigram)) %>% 
  ggplot(aes(x = n, y = trigram))+
  geom_col() +
  labs(y = "Count", x = NULL, 
       title = "15 most frequent trigrams in Wealth of Nations")
## Selecting by n

consequence_of_peace_trigrams <- consequence_of_peace_clean %>% 
                      unnest_tokens(trigram, text, token = "ngrams", n = 3) %>%
                      separate(trigram, c("w1", "w2", "w3"), sep = " ") %>%
                      filter(!w1 %in% stop_words$word) %>%
                      filter(!w2 %in% stop_words$word) %>%
                      filter(!is.na(w1)) %>% 
                      filter(!is.na(w2)) %>% 
                      filter(!is.na(w3)) %>% 
                      unite(trigram, w1, w2, w3, sep=" ")

trigram_frequencies_cop <- consequence_of_peace_trigrams %>% 
  # Count all the trigrams
  count(trigram, sort = TRUE)
 
trigram_frequencies_cop %>% 
  top_n(15) %>%
  mutate(trigram = fct_inorder(trigram)) %>% 
  ggplot(aes(x = n, y = trigram))+
  geom_col() +
  labs(y = "Count", x = NULL, 
       title = "15 most frequent trigrams in Consequences of the Peace")
## Selecting by n