- This code reads the manifesto data.
manifestos <- read_excel("manifestos.xlsx")
manifestos
This code unnests the words in the manifestos, which separates the words into individual rows.
manifestos <- manifestos %>%
unnest_tokens(word, text)
manifestos
- This code finds the lexical diversity, or use of distinct words, the lexical density, which finds what proportion of words are distinct, and the number of words in each manifesto.
manifestos %>%
group_by(author) %>%
summarise(num_words = n(),
lex_diversity = n_distinct(word),
lex_density = n_distinct(word)/n())
- This code finds the mean word length for each manifesto and displays it in a table.
manifestos %>%
group_by(author) %>%
mutate(word_length = nchar(word)) %>%
summarize(mean_word_length = mean(word_length)) %>%
arrange(-mean_word_length)
- This code finds the length of the words in each manifesto and creates graphs for each document.
manifestos %>%
mutate(word_length = nchar(word)) %>%
ggplot(aes(word_length)) +
geom_histogram(binwidth = 1) +
facet_wrap(vars(author), scales = "free_y") +
theme_minimal() +
labs(x = "Word Lengths", y = "Words", title = "Manifesto Word Lengths by Author")

- This code removes the stop words, like “the”, “and”, “you”, from each manifesto so only the distinct words are counted.
manifestos %>%
anti_join(stop_words)
Joining, by = "word"
This code finds the top five most common words in each manifesto, without the stop words, and creates little graphs to disply them.
manifestos %>%
anti_join(stop_words) %>%
group_by(author) %>%
count(word, sort = T) %>%
top_n(5) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = author)) +
geom_col(show.legend = FALSE) +
labs(x = NULL, y = "Most common words") +
facet_wrap(vars(author), scales = "free") +
scale_fill_viridis_d() +
labs(title = "Most Common Words in Each Manifesto") +
theme_minimal() +
coord_flip()
Joining, by = "word"
Selecting by n

- The code below counts and combines the words in each manifesto and the common words between them and uses that to calculate and display the tf-idfs.
manifesto_counts <- manifestos %>%
unnest_tokens(word, text) %>%
count(author, word, sort = TRUE)
total_words <- manifesto_counts %>%
group_by(author) %>%
summarize(total = sum(n))
manifesto_counts <- left_join(manifesto_counts, total_words)
Joining, by = "author"
manifesto_tf_idf <- manifesto_counts %>%
bind_tf_idf(word, author, n)
manifesto_tf_idf %>%
arrange(-tf_idf)
This code creates little graphs of the highest tf-idfs for each manifesto.
manifesto_tf_idf %>%
arrange(-tf_idf) %>%
mutate(word = factor(word, levels = rev(unique(word)))) %>%
group_by(author) %>%
top_n(5) %>%
ggplot(aes(word, tf_idf, fill = author)) +
geom_col(show.legend = FALSE) +
labs(x = NULL, y = "tf-idf") +
facet_wrap(~author, scales = "free") +
coord_flip() +
theme_minimal() +
scale_fill_viridis_d() +
labs(title = "Most Distinctive Words in Each Manifesto")
Selecting by tf_idf

LS0tDQp0aXRsZTogIk1hbmlmZXN0b3MiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoxLiBUaGlzIGNvZGUgcmVhZHMgdGhlIG1hbmlmZXN0byBkYXRhLiAgDQoNCmBgYHtyfQ0KbWFuaWZlc3RvcyA8LSByZWFkX2V4Y2VsKCJtYW5pZmVzdG9zLnhsc3giKQ0KDQptYW5pZmVzdG9zDQpgYGANCg0KVGhpcyBjb2RlIHVubmVzdHMgdGhlIHdvcmRzIGluIHRoZSBtYW5pZmVzdG9zLCB3aGljaCBzZXBhcmF0ZXMgdGhlIHdvcmRzIGludG8gaW5kaXZpZHVhbCByb3dzLiANCg0KYGBge3J9DQptYW5pZmVzdG9zIDwtIG1hbmlmZXN0b3MgJT4lDQogIHVubmVzdF90b2tlbnMod29yZCwgdGV4dCkNCg0KbWFuaWZlc3Rvcw0KYGBgDQoNCjIuIFRoaXMgY29kZSBmaW5kcyB0aGUgbGV4aWNhbCBkaXZlcnNpdHksIG9yIHVzZSBvZiBkaXN0aW5jdCB3b3JkcywgdGhlIGxleGljYWwgZGVuc2l0eSwgd2hpY2ggZmluZHMgd2hhdCBwcm9wb3J0aW9uIG9mIHdvcmRzIGFyZSBkaXN0aW5jdCwgYW5kIHRoZSBudW1iZXIgb2Ygd29yZHMgaW4gZWFjaCBtYW5pZmVzdG8uIA0KDQpgYGB7cn0NCm1hbmlmZXN0b3MgJT4lIA0KICBncm91cF9ieShhdXRob3IpICU+JSANCiAgc3VtbWFyaXNlKG51bV93b3JkcyA9IG4oKSwNCiAgICAgICAgICAgIGxleF9kaXZlcnNpdHkgPSBuX2Rpc3RpbmN0KHdvcmQpLCANCiAgICAgICAgICAgIGxleF9kZW5zaXR5ID0gbl9kaXN0aW5jdCh3b3JkKS9uKCkpDQpgYGANCg0KMy4gVGhpcyBjb2RlIGZpbmRzIHRoZSBtZWFuIHdvcmQgbGVuZ3RoIGZvciBlYWNoIG1hbmlmZXN0byBhbmQgZGlzcGxheXMgaXQgaW4gYSB0YWJsZS4gDQoNCmBgYHtyfQ0KbWFuaWZlc3RvcyAlPiUNCiAgZ3JvdXBfYnkoYXV0aG9yKSAlPiUgDQogIG11dGF0ZSh3b3JkX2xlbmd0aCA9IG5jaGFyKHdvcmQpKSAlPiUgDQogIHN1bW1hcml6ZShtZWFuX3dvcmRfbGVuZ3RoID0gbWVhbih3b3JkX2xlbmd0aCkpICU+JSANCiAgYXJyYW5nZSgtbWVhbl93b3JkX2xlbmd0aCkgDQpgYGANCg0KNC4gVGhpcyBjb2RlIGZpbmRzIHRoZSBsZW5ndGggb2YgdGhlIHdvcmRzIGluIGVhY2ggbWFuaWZlc3RvIGFuZCBjcmVhdGVzIGdyYXBocyBmb3IgZWFjaCBkb2N1bWVudC4NCg0KYGBge3J9DQptYW5pZmVzdG9zICU+JQ0KICBtdXRhdGUod29yZF9sZW5ndGggPSBuY2hhcih3b3JkKSkgJT4lIA0KICBnZ3Bsb3QoYWVzKHdvcmRfbGVuZ3RoKSkgKw0KICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEpICsNCiAgZmFjZXRfd3JhcCh2YXJzKGF1dGhvciksIHNjYWxlcyA9ICJmcmVlX3kiKSArDQogIHRoZW1lX21pbmltYWwoKSArDQogIGxhYnMoeCA9ICJXb3JkIExlbmd0aHMiLCB5ID0gIldvcmRzIiwgdGl0bGUgPSAiTWFuaWZlc3RvIFdvcmQgTGVuZ3RocyBieSBBdXRob3IiKSANCmBgYA0KDQo1LiBUaGlzIGNvZGUgcmVtb3ZlcyB0aGUgc3RvcCB3b3JkcywgbGlrZSAidGhlIiwgImFuZCIsICJ5b3UiLCBmcm9tIGVhY2ggbWFuaWZlc3RvIHNvIG9ubHkgdGhlIGRpc3RpbmN0IHdvcmRzIGFyZSBjb3VudGVkLiANCg0KYGBge3J9DQptYW5pZmVzdG9zICU+JQ0KICBhbnRpX2pvaW4oc3RvcF93b3JkcykNCmBgYA0KDQpUaGlzIGNvZGUgZmluZHMgdGhlIHRvcCBmaXZlIG1vc3QgY29tbW9uIHdvcmRzIGluIGVhY2ggbWFuaWZlc3RvLCB3aXRob3V0IHRoZSBzdG9wIHdvcmRzLCBhbmQgY3JlYXRlcyBsaXR0bGUgZ3JhcGhzIHRvIGRpc3BseSB0aGVtLiANCg0KYGBge3J9DQptYW5pZmVzdG9zICU+JQ0KICBhbnRpX2pvaW4oc3RvcF93b3JkcykgJT4lIA0KICBncm91cF9ieShhdXRob3IpICU+JSANCiAgY291bnQod29yZCwgc29ydCA9IFQpICU+JQ0KICB0b3Bfbig1KSAlPiUgDQogIHVuZ3JvdXAoKSAlPiUgDQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcih3b3JkLCBuKSkgJT4lDQogIGdncGxvdChhZXMod29yZCwgbiwgZmlsbCA9IGF1dGhvcikpICsNCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKw0KICBsYWJzKHggPSBOVUxMLCB5ID0gIk1vc3QgY29tbW9uIHdvcmRzIikgKw0KICBmYWNldF93cmFwKHZhcnMoYXV0aG9yKSwgc2NhbGVzID0gImZyZWUiKSArDQogIHNjYWxlX2ZpbGxfdmlyaWRpc19kKCkgKw0KICBsYWJzKHRpdGxlID0gIk1vc3QgQ29tbW9uIFdvcmRzIGluIEVhY2ggTWFuaWZlc3RvIikgKw0KICB0aGVtZV9taW5pbWFsKCkgKw0KICBjb29yZF9mbGlwKCkNCmBgYA0KDQo2LiBUaGUgY29kZSBiZWxvdyBjb3VudHMgYW5kIGNvbWJpbmVzIHRoZSB3b3JkcyBpbiBlYWNoIG1hbmlmZXN0byBhbmQgdGhlIGNvbW1vbiB3b3JkcyBiZXR3ZWVuIHRoZW0gYW5kIHVzZXMgdGhhdCB0byBjYWxjdWxhdGUgYW5kIGRpc3BsYXkgdGhlIHRmLWlkZnMuICANCg0KYGBge3J9DQptYW5pZmVzdG9fY291bnRzIDwtIG1hbmlmZXN0b3MgJT4lDQogIHVubmVzdF90b2tlbnMod29yZCwgdGV4dCkgJT4lDQogIGNvdW50KGF1dGhvciwgd29yZCwgc29ydCA9IFRSVUUpDQoNCnRvdGFsX3dvcmRzIDwtIG1hbmlmZXN0b19jb3VudHMgJT4lDQogIGdyb3VwX2J5KGF1dGhvcikgJT4lDQogIHN1bW1hcml6ZSh0b3RhbCA9IHN1bShuKSkNCg0KbWFuaWZlc3RvX2NvdW50cyA8LSBsZWZ0X2pvaW4obWFuaWZlc3RvX2NvdW50cywgdG90YWxfd29yZHMpDQoNCm1hbmlmZXN0b190Zl9pZGYgPC0gbWFuaWZlc3RvX2NvdW50cyAlPiUNCiAgYmluZF90Zl9pZGYod29yZCwgYXV0aG9yLCBuKQ0KDQptYW5pZmVzdG9fdGZfaWRmICU+JQ0KICBhcnJhbmdlKC10Zl9pZGYpDQpgYGANCg0KVGhpcyBjb2RlIGNyZWF0ZXMgbGl0dGxlIGdyYXBocyBvZiB0aGUgaGlnaGVzdCB0Zi1pZGZzIGZvciBlYWNoIG1hbmlmZXN0by4gDQoNCmBgYHtyfQ0KbWFuaWZlc3RvX3RmX2lkZiAlPiUNCiAgYXJyYW5nZSgtdGZfaWRmKSAlPiUNCiAgbXV0YXRlKHdvcmQgPSBmYWN0b3Iod29yZCwgbGV2ZWxzID0gcmV2KHVuaXF1ZSh3b3JkKSkpKSAlPiUgDQogIGdyb3VwX2J5KGF1dGhvcikgJT4lIA0KICB0b3Bfbig1KSAlPiUgDQogIGdncGxvdChhZXMod29yZCwgdGZfaWRmLCBmaWxsID0gYXV0aG9yKSkgKw0KICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArDQogIGxhYnMoeCA9IE5VTEwsIHkgPSAidGYtaWRmIikgKw0KICBmYWNldF93cmFwKH5hdXRob3IsIHNjYWxlcyA9ICJmcmVlIikgKw0KICBjb29yZF9mbGlwKCkgKw0KICB0aGVtZV9taW5pbWFsKCkgKw0KICBzY2FsZV9maWxsX3ZpcmlkaXNfZCgpICsNCiAgbGFicyh0aXRsZSA9ICJNb3N0IERpc3RpbmN0aXZlIFdvcmRzIGluIEVhY2ggTWFuaWZlc3RvIikNCmBgYA0KDQo=