This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

manifesto_text <- read_excel("manifestos.xlsx")
manifesto_words <- manifesto_text %>% 
  unnest_tokens(word, text)

manifesto_words %>% 
  group_by(author) %>% 
  summarize(num_words = n(), 
            lex_diversity = n_distinct(word), 
            lexical_density = n_distinct(word)/n()) %>% 
  datatable()

Here’s a table of the manifestos with lexical density and diversity. Roger was so wordy.

manifesto_words %>% 
  group_by(author) %>% 
  mutate(word_length = nchar(word)) %>% 
  summarize(mean_word_length = mean(word_length)) %>% 
  arrange(-mean_word_length) %>% 
  datatable()

Here’s a table of the mean length of the words.

manifesto_words %>% 
  mutate(word_length = nchar(word)) %>% 
  ggplot(aes(word_length)) +
  geom_histogram(binwidth = 1) +
  facet_wrap(vars(author), scales = "free_y") +
  labs(title = "Manifesto Word Length by Author") +
  scale_fill_viridis_d()

Breivik is just showing off.

manifesto_words %>% 
  anti_join(stop_words) %>% 
  group_by(author) %>% 
  count(word, sort = T) %>% 
  top_n(5) %>% 
  ungroup() %>% 
  mutate(word = reorder(word, n)) %>% 
  ggplot(aes(word, n, fill = author)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "Manifesto Most Common Words") +
  facet_wrap(vars(author), scales = "free") +
  scale_fill_viridis_d() +
  theme_minimal() +
  coord_flip()
Joining, by = "word"
Selecting by n

These are so interesting–little poems from the insane. I align with Cho, to be honest.

manifesto_word_counts <- manifesto_text %>% 
  unnest_tokens(word, text) %>% 
  count(author, word, sort = TRUE)

total_words <- manifesto_word_counts %>% 
  group_by(author) %>% 
  summarize(total = sum(n))

manifesto_word_counts <- left_join(manifesto_word_counts, total_words)
Joining, by = "author"
manifesto_tf_idf <- manifesto_word_counts %>% 
  bind_tf_idf(word, author, n)

manifesto_tf_idf %>% 
  arrange(-tf_idf) %>% 
  mutate(word = factor(word, levels = rev(unique(word)))) %>% 
  group_by(author) %>% 
  top_n(5) %>% 
  ggplot(aes(word, tf_idf, fill = author)) + 
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "tf_idf") + 
  facet_wrap(~author, scales = "free") + 
  coord_flip() +
  theme_minimal() +
  scale_fill_viridis_d() +
  labs(title = "Manifesto Distinctive Words")
Selecting by tf_idf

Here’s a graph of the most distinctive words of each manifest. Rodger is a crybaby. Grow up.

LS0tCnRpdGxlOiAiQVByYXR0IFRleHQgQW5hbHlzaXMgQXNzaWdubWVudCIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgoKYGBge3J9Cm1hbmlmZXN0b190ZXh0IDwtIHJlYWRfZXhjZWwoIm1hbmlmZXN0b3MueGxzeCIpCmBgYAoKYGBge3J9Cm1hbmlmZXN0b193b3JkcyA8LSBtYW5pZmVzdG9fdGV4dCAlPiUgCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCB0ZXh0KQoKbWFuaWZlc3RvX3dvcmRzICU+JSAKICBncm91cF9ieShhdXRob3IpICU+JSAKICBzdW1tYXJpemUobnVtX3dvcmRzID0gbigpLCAKICAgICAgICAgICAgbGV4X2RpdmVyc2l0eSA9IG5fZGlzdGluY3Qod29yZCksIAogICAgICAgICAgICBsZXhpY2FsX2RlbnNpdHkgPSBuX2Rpc3RpbmN0KHdvcmQpL24oKSkgJT4lIAogIGRhdGF0YWJsZSgpCmBgYApIZXJlJ3MgYSB0YWJsZSBvZiB0aGUgbWFuaWZlc3RvcyB3aXRoIGxleGljYWwgZGVuc2l0eSBhbmQgZGl2ZXJzaXR5LiBSb2dlciB3YXMgc28gd29yZHkuIAoKYGBge3J9Cm1hbmlmZXN0b193b3JkcyAlPiUgCiAgZ3JvdXBfYnkoYXV0aG9yKSAlPiUgCiAgbXV0YXRlKHdvcmRfbGVuZ3RoID0gbmNoYXIod29yZCkpICU+JSAKICBzdW1tYXJpemUobWVhbl93b3JkX2xlbmd0aCA9IG1lYW4od29yZF9sZW5ndGgpKSAlPiUgCiAgYXJyYW5nZSgtbWVhbl93b3JkX2xlbmd0aCkgJT4lIAogIGRhdGF0YWJsZSgpCmBgYApIZXJlJ3MgYSB0YWJsZSBvZiB0aGUgbWVhbiBsZW5ndGggb2YgdGhlIHdvcmRzLiAKCmBgYHtyfQptYW5pZmVzdG9fd29yZHMgJT4lIAogIG11dGF0ZSh3b3JkX2xlbmd0aCA9IG5jaGFyKHdvcmQpKSAlPiUgCiAgZ2dwbG90KGFlcyh3b3JkX2xlbmd0aCkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEpICsKICBmYWNldF93cmFwKHZhcnMoYXV0aG9yKSwgc2NhbGVzID0gImZyZWVfeSIpICsKICBsYWJzKHRpdGxlID0gIk1hbmlmZXN0byBXb3JkIExlbmd0aCBieSBBdXRob3IiKSArCiAgc2NhbGVfZmlsbF92aXJpZGlzX2QoKQpgYGAKQnJlaXZpayBpcyBqdXN0IHNob3dpbmcgb2ZmLiAKCmBgYHtyfQptYW5pZmVzdG9fd29yZHMgJT4lIAogIGFudGlfam9pbihzdG9wX3dvcmRzKSAlPiUgCiAgZ3JvdXBfYnkoYXV0aG9yKSAlPiUgCiAgY291bnQod29yZCwgc29ydCA9IFQpICU+JSAKICB0b3Bfbig1KSAlPiUgCiAgdW5ncm91cCgpICU+JSAKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JSAKICBnZ3Bsb3QoYWVzKHdvcmQsIG4sIGZpbGwgPSBhdXRob3IpKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGxhYnMoeCA9IE5VTEwsIHkgPSAiTWFuaWZlc3RvIE1vc3QgQ29tbW9uIFdvcmRzIikgKwogIGZhY2V0X3dyYXAodmFycyhhdXRob3IpLCBzY2FsZXMgPSAiZnJlZSIpICsKICBzY2FsZV9maWxsX3ZpcmlkaXNfZCgpICsKICB0aGVtZV9taW5pbWFsKCkgKwogIGNvb3JkX2ZsaXAoKQpgYGAKVGhlc2UgYXJlIHNvIGludGVyZXN0aW5nLS1saXR0bGUgcG9lbXMgZnJvbSB0aGUgaW5zYW5lLiBJIGFsaWduIHdpdGggQ2hvLCB0byBiZSBob25lc3QuIAoKYGBge3J9Cm1hbmlmZXN0b193b3JkX2NvdW50cyA8LSBtYW5pZmVzdG9fdGV4dCAlPiUgCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCB0ZXh0KSAlPiUgCiAgY291bnQoYXV0aG9yLCB3b3JkLCBzb3J0ID0gVFJVRSkKCnRvdGFsX3dvcmRzIDwtIG1hbmlmZXN0b193b3JkX2NvdW50cyAlPiUgCiAgZ3JvdXBfYnkoYXV0aG9yKSAlPiUgCiAgc3VtbWFyaXplKHRvdGFsID0gc3VtKG4pKQoKbWFuaWZlc3RvX3dvcmRfY291bnRzIDwtIGxlZnRfam9pbihtYW5pZmVzdG9fd29yZF9jb3VudHMsIHRvdGFsX3dvcmRzKQpgYGAKCmBgYHtyfQptYW5pZmVzdG9fdGZfaWRmIDwtIG1hbmlmZXN0b193b3JkX2NvdW50cyAlPiUgCiAgYmluZF90Zl9pZGYod29yZCwgYXV0aG9yLCBuKQoKbWFuaWZlc3RvX3RmX2lkZiAlPiUgCiAgYXJyYW5nZSgtdGZfaWRmKSAlPiUgCiAgbXV0YXRlKHdvcmQgPSBmYWN0b3Iod29yZCwgbGV2ZWxzID0gcmV2KHVuaXF1ZSh3b3JkKSkpKSAlPiUgCiAgZ3JvdXBfYnkoYXV0aG9yKSAlPiUgCiAgdG9wX24oNSkgJT4lIAogIGdncGxvdChhZXMod29yZCwgdGZfaWRmLCBmaWxsID0gYXV0aG9yKSkgKyAKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgbGFicyh4ID0gTlVMTCwgeSA9ICJ0Zl9pZGYiKSArIAogIGZhY2V0X3dyYXAofmF1dGhvciwgc2NhbGVzID0gImZyZWUiKSArIAogIGNvb3JkX2ZsaXAoKSArCiAgdGhlbWVfbWluaW1hbCgpICsKICBzY2FsZV9maWxsX3ZpcmlkaXNfZCgpICsKICBsYWJzKHRpdGxlID0gIk1hbmlmZXN0byBEaXN0aW5jdGl2ZSBXb3JkcyIpCmBgYApIZXJlJ3MgYSBncmFwaCBvZiB0aGUgbW9zdCBkaXN0aW5jdGl2ZSB3b3JkcyBvZiBlYWNoIG1hbmlmZXN0LiBSb2RnZXIgaXMgYSBjcnliYWJ5LiBHcm93IHVwLiAK