Load packages
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.6
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## Warning: package 'dplyr' was built under R version 3.5.1
## ── Conflicts ─────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tidytext)
library(topicmodels)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
The data
df <- read_csv("state_of_the_union.csv")
## Parsed with column specification:
## cols(
## text = col_character(),
## document = col_integer(),
## president = col_character(),
## date = col_date(format = ""),
## party = col_character()
## )
names(df)
## [1] "text" "document" "president" "date" "party"
df
## # A tibble: 171,177 x 5
## text document president date party
## <chr> <int> <chr> <date> <chr>
## 1 Fellow-Citizens of the Senate a… 1 George Wash… 1790-01-08 none
## 2 <NA> 1 George Wash… 1790-01-08 none
## 3 I embrace with great satisfacti… 1 George Wash… 1790-01-08 none
## 4 itself of congratulating you on… 1 George Wash… 1790-01-08 none
## 5 public affairs. The recent acce… 1 George Wash… 1790-01-08 none
## 6 Carolina to the Constitution of… 1 George Wash… 1790-01-08 none
## 7 information has been received),… 1 George Wash… 1790-01-08 none
## 8 our country, the general and in… 1 George Wash… 1790-01-08 none
## 9 of the Union, and the concord, … 1 George Wash… 1790-01-08 none
## 10 blessed are circumstances auspi… 1 George Wash… 1790-01-08 none
## # ... with 171,167 more rows
Preparing the data
exclude <- c("united","states","must","may","can")
df %<>%
unnest_tokens(word, text) %>%
anti_join(get_stopwords()) %>%
filter(!str_detect(word, "[0-9]+ | NA")) %>%
filter(!word %in% exclude) %>%
add_count(word) %>%
filter(n > 10) %>%
select(-n)
## Joining, by = "word"
df
## # A tibble: 753,204 x 5
## document president date party word
## <int> <chr> <date> <chr> <chr>
## 1 1 George Washington 1790-01-08 none fellow
## 2 1 George Washington 1790-01-08 none citizens
## 3 1 George Washington 1790-01-08 none senate
## 4 1 George Washington 1790-01-08 none house
## 5 1 George Washington 1790-01-08 none representatives
## 6 1 George Washington 1790-01-08 none embrace
## 7 1 George Washington 1790-01-08 none great
## 8 1 George Washington 1790-01-08 none satisfaction
## 9 1 George Washington 1790-01-08 none opportunity
## 10 1 George Washington 1790-01-08 none now
## # ... with 753,194 more rows
Cast to document term matrix
df_dtm_pres <- df %>%
count(president, word) %>%
cast_dtm(president, word, n)
## Warning: Trying to compute distinct() for variables not found in the data:
## - `row_col`, `column_col`
## This is an error, but only a warning is raised for compatibility reasons.
## The operation will return the input unchanged.
df_dtm_pres
## <<DocumentTermMatrix (documents: 40, terms: 7316)>>
## Non-/sparse entries: 129820/162820
## Sparsity : 56%
## Maximal term length: 17
## Weighting : term frequency (tf)
str(df_dtm_pres)
## List of 6
## $ i : int [1:129820] 1 3 4 5 6 7 8 9 10 13 ...
## $ j : int [1:129820] 1 1 1 1 1 1 1 1 1 1 ...
## $ v : num [1:129820] 4 9 32 3 10 19 7 2 3 22 ...
## $ nrow : int 40
## $ ncol : int 7316
## $ dimnames:List of 2
## ..$ Docs : chr [1:40] "Abraham Lincoln" "Andrew Jackson" "Andrew Johnson" "Benjamin Harrison" ...
## ..$ Terms: chr [1:7316] "1" "1,000" "1,000,000" "1,500" ...
## - attr(*, "class")= chr [1:2] "DocumentTermMatrix" "simple_triplet_matrix"
## - attr(*, "weighting")= chr [1:2] "term frequency" "tf"
Fit LDA model (Latent Dirichlet Allocation)
lda_model <- LDA(df_dtm_pres, k=30, method = "VEM")
lda_model
## A LDA_VEM topic model with 30 topics.
Get posterior probabilities of topics for each president
post_probs <- topicmodels::posterior(lda_model, df_dtm_pres)[["topics"]]
post_probs
## 1 2 3 4
## Abraham Lincoln 5.827086e-07 5.827086e-07 5.827086e-07 5.827086e-07
## Andrew Jackson 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07
## Andrew Johnson 1.718502e-01 4.189953e-07 4.189953e-07 4.189953e-07
## Benjamin Harrison 1.110911e-03 2.925661e-07 2.925661e-07 2.925661e-07
## Calvin Coolidge 2.969591e-07 2.969591e-07 2.969591e-07 2.969591e-07
## Chester A. Arthur 9.816109e-01 7.982384e-07 7.982384e-07 7.982384e-07
## Dwight D. Eisenhower 2.566825e-07 9.990404e-01 2.566825e-07 9.524297e-04
## Franklin D. Roosevelt 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07
## Franklin Pierce 3.864294e-07 3.864294e-07 3.864294e-07 3.864294e-07
## George H.W. Bush 1.254076e-06 1.254076e-06 1.872809e-02 1.203260e-01
## George W. Bush 4.470402e-07 4.470402e-07 9.999870e-01 4.470402e-07
## George Washington 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06
## Gerald R. Ford 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06
## Grover Cleveland 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07
## Harry S. Truman 2.168366e-07 2.168366e-07 2.168366e-07 1.133911e-01
## Herbert Hoover 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07
## James Buchanan 2.842836e-07 2.842836e-07 2.842836e-07 2.842836e-07
## James Madison 7.709251e-07 7.709251e-07 7.709251e-07 7.709251e-07
## James Monroe 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07
## James Polk 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07
## Jimmy Carter 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07
## John Adams 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06
## John F. Kennedy 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07
## John Quincy Adams 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07
## John Tyler 4.878746e-07 4.878746e-07 4.878746e-07 4.878746e-07
## Lyndon B. Johnson 5.245761e-07 5.245761e-07 5.245761e-07 4.139290e-02
## Martin van Buren 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07
## Millard Fillmore 5.184640e-07 5.184640e-07 5.184640e-07 5.184640e-07
## Richard Nixon 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07
## Ronald Reagan 4.600685e-07 4.600685e-07 4.600685e-07 4.694378e-01
## Rutherford B. Hayes 8.163643e-01 4.539668e-07 4.539668e-07 4.539668e-07
## Theodore Roosevelt 1.013197e-07 1.013197e-07 1.013197e-07 1.013197e-07
## Thomas Jefferson 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07
## Ulysses S. Grant 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07
## Warren Harding 1.369899e-06 1.369899e-06 1.369899e-06 4.825653e-02
## William H. Taft 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07
## William J. Clinton 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07
## William McKinley 2.459831e-04 2.309387e-07 2.309387e-07 2.309387e-07
## Woodrow Wilson 4.753025e-07 4.753025e-07 4.753025e-07 4.753025e-07
## Zachary Taylor 2.139826e-06 2.139826e-06 2.139826e-06 2.139826e-06
## 5 6 7 8
## Abraham Lincoln 5.827086e-07 5.827086e-07 8.890786e-03 5.827086e-07
## Andrew Jackson 9.996561e-01 1.943286e-07 1.943286e-07 1.943286e-07
## Andrew Johnson 4.189953e-07 4.189953e-07 8.281381e-01 4.189953e-07
## Benjamin Harrison 2.925661e-07 2.925661e-07 2.925661e-07 2.925661e-07
## Calvin Coolidge 2.969591e-07 2.969591e-07 2.969591e-07 2.969591e-07
## Chester A. Arthur 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07
## Dwight D. Eisenhower 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07
## Franklin D. Roosevelt 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07
## Franklin Pierce 3.864294e-07 3.864294e-07 3.864294e-07 3.864294e-07
## George H.W. Bush 1.254076e-06 1.254076e-06 1.254076e-06 3.454309e-01
## George W. Bush 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07
## George Washington 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06
## Gerald R. Ford 1.046976e-06 1.046976e-06 1.046976e-06 9.999696e-01
## Grover Cleveland 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07
## Harry S. Truman 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07
## Herbert Hoover 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07
## James Buchanan 2.842836e-07 2.842836e-07 6.266818e-01 2.842836e-07
## James Madison 7.709251e-07 7.709251e-07 7.709251e-07 7.709251e-07
## James Monroe 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07
## James Polk 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07
## Jimmy Carter 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07
## John Adams 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06
## John F. Kennedy 8.573549e-07 8.573549e-07 8.573549e-07 9.200682e-01
## John Quincy Adams 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07
## John Tyler 4.878746e-07 4.878746e-07 4.878746e-07 4.878746e-07
## Lyndon B. Johnson 5.245761e-07 5.245761e-07 5.245761e-07 5.245761e-07
## Martin van Buren 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07
## Millard Fillmore 1.226534e-02 5.184640e-07 5.184640e-07 5.184640e-07
## Richard Nixon 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07
## Ronald Reagan 4.600685e-07 4.600685e-07 4.600685e-07 4.600685e-07
## Rutherford B. Hayes 4.539668e-07 4.539668e-07 4.539668e-07 4.539668e-07
## Theodore Roosevelt 1.013197e-07 1.013197e-07 1.013197e-07 1.013197e-07
## Thomas Jefferson 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07
## Ulysses S. Grant 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07
## Warren Harding 1.369899e-06 1.369899e-06 1.369899e-06 1.369899e-06
## William H. Taft 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07
## William J. Clinton 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07
## William McKinley 2.309387e-07 9.838158e-01 2.309387e-07 2.309387e-07
## Woodrow Wilson 4.753025e-07 4.753025e-07 4.753025e-07 4.753025e-07
## Zachary Taylor 2.139826e-06 2.139826e-06 2.139826e-06 2.139826e-06
## 9 10 11 12
## Abraham Lincoln 5.827086e-07 5.827086e-07 5.827086e-07 9.910929e-01
## Andrew Jackson 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07
## Andrew Johnson 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07
## Benjamin Harrison 3.338179e-02 2.925661e-07 2.925661e-07 2.925661e-07
## Calvin Coolidge 2.969591e-07 2.969591e-07 2.969591e-07 2.969591e-07
## Chester A. Arthur 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07
## Dwight D. Eisenhower 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07
## Franklin D. Roosevelt 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07
## Franklin Pierce 3.864294e-07 3.864294e-07 3.864294e-07 3.864294e-07
## George H.W. Bush 1.254076e-06 1.254076e-06 1.254076e-06 1.254076e-06
## George W. Bush 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07
## George Washington 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06
## Gerald R. Ford 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06
## Grover Cleveland 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07
## Harry S. Truman 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07
## Herbert Hoover 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07
## James Buchanan 2.842836e-07 2.842836e-07 1.103214e-03 2.842836e-07
## James Madison 7.709251e-07 7.709251e-07 7.709251e-07 7.709251e-07
## James Monroe 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07
## James Polk 2.256683e-07 2.256683e-07 6.871621e-01 2.256683e-07
## Jimmy Carter 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07
## John Adams 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06
## John F. Kennedy 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07
## John Quincy Adams 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07
## John Tyler 4.878746e-07 4.878746e-07 5.762465e-01 4.878746e-07
## Lyndon B. Johnson 5.245761e-07 5.245761e-07 5.245761e-07 5.245761e-07
## Martin van Buren 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07
## Millard Fillmore 5.184640e-07 5.184640e-07 1.765572e-01 7.457907e-01
## Richard Nixon 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07
## Ronald Reagan 4.600685e-07 4.600685e-07 4.600685e-07 4.600685e-07
## Rutherford B. Hayes 4.539668e-07 4.539668e-07 4.539668e-07 1.899570e-02
## Theodore Roosevelt 1.013197e-07 4.140588e-01 1.013197e-07 1.013197e-07
## Thomas Jefferson 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07
## Ulysses S. Grant 9.967444e-01 2.463617e-07 2.463617e-07 3.248706e-03
## Warren Harding 1.369899e-06 1.369899e-06 1.369899e-06 1.369899e-06
## William H. Taft 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07
## William J. Clinton 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07
## William McKinley 2.309387e-07 2.309387e-07 2.309387e-07 2.309387e-07
## Woodrow Wilson 4.753025e-07 4.753025e-07 4.753025e-07 4.753025e-07
## Zachary Taylor 2.139826e-06 2.139826e-06 1.254361e-01 3.427219e-01
## 13 14 15 16
## Abraham Lincoln 5.827086e-07 5.827086e-07 5.827086e-07 5.827086e-07
## Andrew Jackson 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07
## Andrew Johnson 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07
## Benjamin Harrison 2.925661e-07 9.654994e-01 2.925661e-07 2.925661e-07
## Calvin Coolidge 2.969591e-07 2.969591e-07 2.969591e-07 2.969591e-07
## Chester A. Arthur 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07
## Dwight D. Eisenhower 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07
## Franklin D. Roosevelt 3.298549e-07 3.298549e-07 9.999904e-01 3.298549e-07
## Franklin Pierce 3.864294e-07 3.864294e-07 3.864294e-07 3.864294e-07
## George H.W. Bush 4.960595e-01 1.254076e-06 1.254076e-06 1.942409e-02
## George W. Bush 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07
## George Washington 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06
## Gerald R. Ford 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06
## Grover Cleveland 3.195077e-07 5.287276e-01 3.195077e-07 3.195077e-07
## Harry S. Truman 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07
## Herbert Hoover 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07
## James Buchanan 2.842836e-07 2.842836e-07 2.842836e-07 2.842836e-07
## James Madison 7.709251e-07 7.709251e-07 7.709251e-07 7.709251e-07
## James Monroe 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07
## James Polk 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07
## Jimmy Carter 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07
## John Adams 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06
## John F. Kennedy 8.573549e-07 8.573549e-07 8.573549e-07 7.990779e-02
## John Quincy Adams 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07
## John Tyler 4.878746e-07 4.878746e-07 4.878746e-07 4.878746e-07
## Lyndon B. Johnson 5.245761e-07 5.245761e-07 5.245761e-07 5.319860e-01
## Martin van Buren 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07
## Millard Fillmore 5.184640e-07 5.184640e-07 5.184640e-07 5.184640e-07
## Richard Nixon 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07
## Ronald Reagan 3.812971e-01 4.600685e-07 4.600685e-07 4.600685e-07
## Rutherford B. Hayes 4.539668e-07 4.539668e-07 4.539668e-07 4.539668e-07
## Theodore Roosevelt 1.013197e-07 1.013197e-07 1.013197e-07 1.013197e-07
## Thomas Jefferson 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07
## Ulysses S. Grant 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07
## Warren Harding 1.369899e-06 1.369899e-06 1.369899e-06 1.369899e-06
## William H. Taft 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07
## William J. Clinton 7.765478e-01 2.490766e-07 2.490766e-07 2.234452e-01
## William McKinley 2.309387e-07 2.309387e-07 2.309387e-07 2.309387e-07
## Woodrow Wilson 4.753025e-07 4.753025e-07 4.753026e-07 4.753025e-07
## Zachary Taylor 2.139826e-06 2.139826e-06 2.139826e-06 2.139826e-06
## 17 18 19 20
## Abraham Lincoln 5.827086e-07 5.827086e-07 5.827086e-07 5.827086e-07
## Andrew Jackson 1.943286e-07 1.943286e-07 3.384159e-04 1.943286e-07
## Andrew Johnson 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07
## Benjamin Harrison 2.925661e-07 2.925661e-07 2.925661e-07 2.925661e-07
## Calvin Coolidge 2.969591e-07 2.969591e-07 2.969591e-07 2.969591e-07
## Chester A. Arthur 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07
## Dwight D. Eisenhower 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07
## Franklin D. Roosevelt 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07
## Franklin Pierce 3.864294e-07 3.864294e-07 3.017448e-01 3.864294e-07
## George H.W. Bush 1.254076e-06 1.254076e-06 1.254076e-06 1.254076e-06
## George W. Bush 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07
## George Washington 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06
## Gerald R. Ford 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06
## Grover Cleveland 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07
## Harry S. Truman 2.168366e-07 2.168366e-07 2.168366e-07 8.866028e-01
## Herbert Hoover 6.670586e-07 6.670586e-07 6.670586e-07 1.209756e-03
## James Buchanan 2.842836e-07 2.842836e-07 2.842836e-07 2.842836e-07
## James Madison 7.709251e-07 7.709251e-07 7.709251e-07 7.709251e-07
## James Monroe 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07
## James Polk 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07
## Jimmy Carter 9.999911e-01 3.064299e-07 3.064299e-07 3.064299e-07
## John Adams 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06
## John F. Kennedy 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07
## John Quincy Adams 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07
## John Tyler 4.878746e-07 4.878746e-07 1.306501e-03 4.878746e-07
## Lyndon B. Johnson 5.245761e-07 5.245761e-07 5.245761e-07 1.001316e-02
## Martin van Buren 3.573370e-07 3.573370e-07 9.964703e-01 3.573370e-07
## Millard Fillmore 5.184640e-07 5.184640e-07 5.184640e-07 5.184640e-07
## Richard Nixon 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07
## Ronald Reagan 4.600685e-07 4.600685e-07 4.600685e-07 4.600685e-07
## Rutherford B. Hayes 4.539668e-07 4.539668e-07 4.539668e-07 4.539668e-07
## Theodore Roosevelt 1.013197e-07 9.842067e-02 1.013197e-07 1.013197e-07
## Thomas Jefferson 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07
## Ulysses S. Grant 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07
## Warren Harding 1.369899e-06 8.778205e-01 1.369899e-06 1.369899e-06
## William H. Taft 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07
## William J. Clinton 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07
## William McKinley 2.309387e-07 2.309387e-07 2.309387e-07 2.309387e-07
## Woodrow Wilson 4.753025e-07 4.753026e-07 4.753025e-07 4.753025e-07
## Zachary Taylor 2.139826e-06 2.139826e-06 2.139826e-06 2.139826e-06
## 21 22 23 24
## Abraham Lincoln 5.827086e-07 5.827086e-07 5.827086e-07 5.827086e-07
## Andrew Jackson 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07
## Andrew Johnson 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07
## Benjamin Harrison 2.925661e-07 2.925661e-07 2.925661e-07 2.925661e-07
## Calvin Coolidge 2.969591e-07 2.969591e-07 2.969591e-07 9.390188e-01
## Chester A. Arthur 1.836680e-02 7.982384e-07 7.982384e-07 7.982384e-07
## Dwight D. Eisenhower 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07
## Franklin D. Roosevelt 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07
## Franklin Pierce 3.864294e-07 3.864294e-07 3.864294e-07 3.864294e-07
## George H.W. Bush 1.254076e-06 1.254076e-06 1.254076e-06 1.254076e-06
## George W. Bush 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07
## George Washington 1.021294e-06 1.021294e-06 9.999704e-01 1.021294e-06
## Gerald R. Ford 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06
## Grover Cleveland 4.712634e-01 3.195077e-07 3.195077e-07 3.195077e-07
## Harry S. Truman 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07
## Herbert Hoover 6.670586e-07 6.670586e-07 6.670586e-07 9.987716e-01
## James Buchanan 2.842836e-07 2.842836e-07 2.842836e-07 2.842836e-07
## James Madison 7.709251e-07 7.709251e-07 6.724711e-01 7.709251e-07
## James Monroe 3.975195e-07 3.975195e-07 2.154976e-01 3.975195e-07
## James Polk 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07
## Jimmy Carter 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07
## John Adams 2.312168e-06 2.312168e-06 9.552926e-01 2.312168e-06
## John F. Kennedy 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07
## John Quincy Adams 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07
## John Tyler 4.224338e-01 4.878746e-07 4.878746e-07 4.878746e-07
## Lyndon B. Johnson 5.245761e-07 4.165943e-01 5.245761e-07 5.245761e-07
## Martin van Buren 3.519706e-03 3.573370e-07 3.573370e-07 3.573370e-07
## Millard Fillmore 5.184640e-07 5.184640e-07 8.603266e-03 5.184640e-07
## Richard Nixon 5.279604e-07 9.999847e-01 5.279604e-07 5.279604e-07
## Ronald Reagan 4.600685e-07 1.492527e-01 4.600685e-07 4.600685e-07
## Rutherford B. Hayes 1.646277e-01 4.539668e-07 4.539668e-07 4.539668e-07
## Theodore Roosevelt 1.013197e-07 1.013197e-07 1.013197e-07 1.013197e-07
## Thomas Jefferson 7.893879e-07 7.893879e-07 9.999771e-01 7.893879e-07
## Ulysses S. Grant 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07
## Warren Harding 1.369899e-06 1.369899e-06 1.369899e-06 7.388593e-02
## William H. Taft 3.660018e-02 2.216635e-07 2.216635e-07 2.216635e-07
## William J. Clinton 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07
## William McKinley 1.989055e-03 2.309387e-07 2.309387e-07 2.309387e-07
## Woodrow Wilson 4.753025e-07 4.753025e-07 4.753025e-07 4.753025e-07
## Zachary Taylor 2.139826e-06 2.139826e-06 6.147556e-02 2.139826e-06
## 25 26 27 28
## Abraham Lincoln 5.827086e-07 5.827086e-07 5.827086e-07 5.827086e-07
## Andrew Jackson 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07
## Andrew Johnson 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07
## Benjamin Harrison 2.925661e-07 2.925661e-07 2.925661e-07 2.925661e-07
## Calvin Coolidge 6.055442e-02 2.969591e-07 2.969591e-07 2.969591e-07
## Chester A. Arthur 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07
## Dwight D. Eisenhower 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07
## Franklin D. Roosevelt 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07
## Franklin Pierce 3.864294e-07 3.864294e-07 6.982443e-01 3.864294e-07
## George H.W. Bush 1.254076e-06 1.254076e-06 1.254076e-06 1.254076e-06
## George W. Bush 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07
## George Washington 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06
## Gerald R. Ford 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06
## Grover Cleveland 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07
## Harry S. Truman 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07
## Herbert Hoover 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07
## James Buchanan 2.842836e-07 2.842836e-07 3.722073e-01 2.842836e-07
## James Madison 7.709251e-07 7.709251e-07 7.709251e-07 3.275073e-01
## James Monroe 3.975195e-07 7.844912e-01 3.975195e-07 3.975195e-07
## James Polk 2.256683e-07 3.128316e-01 2.256683e-07 2.256683e-07
## Jimmy Carter 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07
## John Adams 2.312168e-06 4.464267e-02 2.312168e-06 2.312168e-06
## John F. Kennedy 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07
## John Quincy Adams 5.402598e-07 5.402598e-07 5.402598e-07 9.999843e-01
## John Tyler 4.878746e-07 4.878746e-07 4.878746e-07 4.878746e-07
## Lyndon B. Johnson 5.245761e-07 5.245761e-07 5.245761e-07 5.245761e-07
## Martin van Buren 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07
## Millard Fillmore 5.184640e-07 5.184640e-07 5.677053e-02 5.184640e-07
## Richard Nixon 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07
## Ronald Reagan 4.600685e-07 4.600685e-07 4.600685e-07 4.600685e-07
## Rutherford B. Hayes 4.539668e-07 4.539668e-07 4.539668e-07 4.539668e-07
## Theodore Roosevelt 4.870072e-01 1.013197e-07 1.013197e-07 1.013197e-07
## Thomas Jefferson 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07
## Ulysses S. Grant 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07
## Warren Harding 1.369899e-06 1.369899e-06 1.369899e-06 1.369899e-06
## William H. Taft 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07
## William J. Clinton 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07
## William McKinley 2.309387e-07 2.309387e-07 1.394314e-02 2.309387e-07
## Woodrow Wilson 4.753026e-07 4.753025e-07 4.753025e-07 4.753025e-07
## Zachary Taylor 2.139826e-06 2.139826e-06 4.703108e-01 2.139826e-06
## 29 30
## Abraham Lincoln 5.827086e-07 5.827086e-07
## Andrew Jackson 1.943286e-07 1.943286e-07
## Andrew Johnson 4.189953e-07 4.189953e-07
## Benjamin Harrison 2.925661e-07 2.925661e-07
## Calvin Coolidge 4.187530e-04 2.969591e-07
## Chester A. Arthur 7.982384e-07 7.982384e-07
## Dwight D. Eisenhower 2.566825e-07 2.566825e-07
## Franklin D. Roosevelt 3.298549e-07 3.298549e-07
## Franklin Pierce 3.864294e-07 3.864294e-07
## George H.W. Bush 1.254076e-06 1.254076e-06
## George W. Bush 4.470402e-07 4.470402e-07
## George Washington 1.021294e-06 1.021294e-06
## Gerald R. Ford 1.046976e-06 1.046976e-06
## Grover Cleveland 3.195077e-07 3.195077e-07
## Harry S. Truman 2.168366e-07 2.168366e-07
## Herbert Hoover 6.670586e-07 6.670586e-07
## James Buchanan 2.842836e-07 2.842836e-07
## James Madison 7.709251e-07 7.709251e-07
## James Monroe 3.975195e-07 3.975195e-07
## James Polk 2.256683e-07 2.256683e-07
## Jimmy Carter 3.064299e-07 3.064299e-07
## John Adams 2.312168e-06 2.312168e-06
## John F. Kennedy 8.573549e-07 8.573549e-07
## John Quincy Adams 5.402598e-07 5.402598e-07
## John Tyler 4.878746e-07 4.878746e-07
## Lyndon B. Johnson 5.245761e-07 5.245761e-07
## Martin van Buren 3.573370e-07 3.573370e-07
## Millard Fillmore 5.184640e-07 5.184640e-07
## Richard Nixon 5.279604e-07 5.279604e-07
## Ronald Reagan 4.600685e-07 4.600685e-07
## Rutherford B. Hayes 4.539668e-07 4.539668e-07
## Theodore Roosevelt 2.818149e-04 2.289705e-04
## Thomas Jefferson 7.893879e-07 7.893879e-07
## Ulysses S. Grant 2.463617e-07 2.463617e-07
## Warren Harding 1.369899e-06 1.369899e-06
## William H. Taft 9.633936e-01 2.216635e-07
## William J. Clinton 2.490766e-07 2.490766e-07
## William McKinley 2.309387e-07 2.309387e-07
## Woodrow Wilson 4.753025e-07 9.999862e-01
## Zachary Taylor 2.139826e-06 2.139826e-06
Basic heatmap
heatmap(post_probs)

Converting to a data frame
heat_df <- post_probs %>%
data.frame() %>%
mutate(president = rownames(post_probs)) %>%
mutate(president = unique(df$president))
heat_df
## X1 X2 X3 X4 X5
## 1 5.827086e-07 5.827086e-07 5.827086e-07 5.827086e-07 5.827086e-07
## 2 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07 9.996561e-01
## 3 1.718502e-01 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07
## 4 1.110911e-03 2.925661e-07 2.925661e-07 2.925661e-07 2.925661e-07
## 5 2.969591e-07 2.969591e-07 2.969591e-07 2.969591e-07 2.969591e-07
## 6 9.816109e-01 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07
## 7 2.566825e-07 9.990404e-01 2.566825e-07 9.524297e-04 2.566825e-07
## 8 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07
## 9 3.864294e-07 3.864294e-07 3.864294e-07 3.864294e-07 3.864294e-07
## 10 1.254076e-06 1.254076e-06 1.872809e-02 1.203260e-01 1.254076e-06
## 11 4.470402e-07 4.470402e-07 9.999870e-01 4.470402e-07 4.470402e-07
## 12 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06
## 13 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06
## 14 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07
## 15 2.168366e-07 2.168366e-07 2.168366e-07 1.133911e-01 2.168366e-07
## 16 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07
## 17 2.842836e-07 2.842836e-07 2.842836e-07 2.842836e-07 2.842836e-07
## 18 7.709251e-07 7.709251e-07 7.709251e-07 7.709251e-07 7.709251e-07
## 19 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07
## 20 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07
## 21 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07
## 22 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06
## 23 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07
## 24 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07
## 25 4.878746e-07 4.878746e-07 4.878746e-07 4.878746e-07 4.878746e-07
## 26 5.245761e-07 5.245761e-07 5.245761e-07 4.139290e-02 5.245761e-07
## 27 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07
## 28 5.184640e-07 5.184640e-07 5.184640e-07 5.184640e-07 1.226534e-02
## 29 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07
## 30 4.600685e-07 4.600685e-07 4.600685e-07 4.694378e-01 4.600685e-07
## 31 8.163643e-01 4.539668e-07 4.539668e-07 4.539668e-07 4.539668e-07
## 32 1.013197e-07 1.013197e-07 1.013197e-07 1.013197e-07 1.013197e-07
## 33 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07
## 34 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07
## 35 1.369899e-06 1.369899e-06 1.369899e-06 4.825653e-02 1.369899e-06
## 36 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07
## 37 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07
## 38 2.459831e-04 2.309387e-07 2.309387e-07 2.309387e-07 2.309387e-07
## 39 4.753025e-07 4.753025e-07 4.753025e-07 4.753025e-07 4.753025e-07
## 40 2.139826e-06 2.139826e-06 2.139826e-06 2.139826e-06 2.139826e-06
## X6 X7 X8 X9 X10
## 1 5.827086e-07 8.890786e-03 5.827086e-07 5.827086e-07 5.827086e-07
## 2 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07
## 3 4.189953e-07 8.281381e-01 4.189953e-07 4.189953e-07 4.189953e-07
## 4 2.925661e-07 2.925661e-07 2.925661e-07 3.338179e-02 2.925661e-07
## 5 2.969591e-07 2.969591e-07 2.969591e-07 2.969591e-07 2.969591e-07
## 6 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07
## 7 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07
## 8 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07
## 9 3.864294e-07 3.864294e-07 3.864294e-07 3.864294e-07 3.864294e-07
## 10 1.254076e-06 1.254076e-06 3.454309e-01 1.254076e-06 1.254076e-06
## 11 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07
## 12 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06
## 13 1.046976e-06 1.046976e-06 9.999696e-01 1.046976e-06 1.046976e-06
## 14 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07
## 15 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07
## 16 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07
## 17 2.842836e-07 6.266818e-01 2.842836e-07 2.842836e-07 2.842836e-07
## 18 7.709251e-07 7.709251e-07 7.709251e-07 7.709251e-07 7.709251e-07
## 19 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07
## 20 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07
## 21 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07
## 22 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06
## 23 8.573549e-07 8.573549e-07 9.200682e-01 8.573549e-07 8.573549e-07
## 24 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07
## 25 4.878746e-07 4.878746e-07 4.878746e-07 4.878746e-07 4.878746e-07
## 26 5.245761e-07 5.245761e-07 5.245761e-07 5.245761e-07 5.245761e-07
## 27 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07
## 28 5.184640e-07 5.184640e-07 5.184640e-07 5.184640e-07 5.184640e-07
## 29 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07
## 30 4.600685e-07 4.600685e-07 4.600685e-07 4.600685e-07 4.600685e-07
## 31 4.539668e-07 4.539668e-07 4.539668e-07 4.539668e-07 4.539668e-07
## 32 1.013197e-07 1.013197e-07 1.013197e-07 1.013197e-07 4.140588e-01
## 33 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07
## 34 2.463617e-07 2.463617e-07 2.463617e-07 9.967444e-01 2.463617e-07
## 35 1.369899e-06 1.369899e-06 1.369899e-06 1.369899e-06 1.369899e-06
## 36 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07
## 37 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07
## 38 9.838158e-01 2.309387e-07 2.309387e-07 2.309387e-07 2.309387e-07
## 39 4.753025e-07 4.753025e-07 4.753025e-07 4.753025e-07 4.753025e-07
## 40 2.139826e-06 2.139826e-06 2.139826e-06 2.139826e-06 2.139826e-06
## X11 X12 X13 X14 X15
## 1 5.827086e-07 9.910929e-01 5.827086e-07 5.827086e-07 5.827086e-07
## 2 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07
## 3 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07
## 4 2.925661e-07 2.925661e-07 2.925661e-07 9.654994e-01 2.925661e-07
## 5 2.969591e-07 2.969591e-07 2.969591e-07 2.969591e-07 2.969591e-07
## 6 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07
## 7 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07
## 8 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07 9.999904e-01
## 9 3.864294e-07 3.864294e-07 3.864294e-07 3.864294e-07 3.864294e-07
## 10 1.254076e-06 1.254076e-06 4.960595e-01 1.254076e-06 1.254076e-06
## 11 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07
## 12 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06
## 13 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06
## 14 3.195077e-07 3.195077e-07 3.195077e-07 5.287276e-01 3.195077e-07
## 15 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07
## 16 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07
## 17 1.103214e-03 2.842836e-07 2.842836e-07 2.842836e-07 2.842836e-07
## 18 7.709251e-07 7.709251e-07 7.709251e-07 7.709251e-07 7.709251e-07
## 19 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07
## 20 6.871621e-01 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07
## 21 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07
## 22 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06
## 23 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07
## 24 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07
## 25 5.762465e-01 4.878746e-07 4.878746e-07 4.878746e-07 4.878746e-07
## 26 5.245761e-07 5.245761e-07 5.245761e-07 5.245761e-07 5.245761e-07
## 27 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07
## 28 1.765572e-01 7.457907e-01 5.184640e-07 5.184640e-07 5.184640e-07
## 29 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07
## 30 4.600685e-07 4.600685e-07 3.812971e-01 4.600685e-07 4.600685e-07
## 31 4.539668e-07 1.899570e-02 4.539668e-07 4.539668e-07 4.539668e-07
## 32 1.013197e-07 1.013197e-07 1.013197e-07 1.013197e-07 1.013197e-07
## 33 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07
## 34 2.463617e-07 3.248706e-03 2.463617e-07 2.463617e-07 2.463617e-07
## 35 1.369899e-06 1.369899e-06 1.369899e-06 1.369899e-06 1.369899e-06
## 36 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07
## 37 2.490766e-07 2.490766e-07 7.765478e-01 2.490766e-07 2.490766e-07
## 38 2.309387e-07 2.309387e-07 2.309387e-07 2.309387e-07 2.309387e-07
## 39 4.753025e-07 4.753025e-07 4.753025e-07 4.753025e-07 4.753026e-07
## 40 1.254361e-01 3.427219e-01 2.139826e-06 2.139826e-06 2.139826e-06
## X16 X17 X18 X19 X20
## 1 5.827086e-07 5.827086e-07 5.827086e-07 5.827086e-07 5.827086e-07
## 2 1.943286e-07 1.943286e-07 1.943286e-07 3.384159e-04 1.943286e-07
## 3 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07
## 4 2.925661e-07 2.925661e-07 2.925661e-07 2.925661e-07 2.925661e-07
## 5 2.969591e-07 2.969591e-07 2.969591e-07 2.969591e-07 2.969591e-07
## 6 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07
## 7 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07
## 8 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07
## 9 3.864294e-07 3.864294e-07 3.864294e-07 3.017448e-01 3.864294e-07
## 10 1.942409e-02 1.254076e-06 1.254076e-06 1.254076e-06 1.254076e-06
## 11 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07
## 12 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06
## 13 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06
## 14 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07
## 15 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07 8.866028e-01
## 16 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07 1.209756e-03
## 17 2.842836e-07 2.842836e-07 2.842836e-07 2.842836e-07 2.842836e-07
## 18 7.709251e-07 7.709251e-07 7.709251e-07 7.709251e-07 7.709251e-07
## 19 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07
## 20 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07
## 21 3.064299e-07 9.999911e-01 3.064299e-07 3.064299e-07 3.064299e-07
## 22 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06
## 23 7.990779e-02 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07
## 24 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07
## 25 4.878746e-07 4.878746e-07 4.878746e-07 1.306501e-03 4.878746e-07
## 26 5.319860e-01 5.245761e-07 5.245761e-07 5.245761e-07 1.001316e-02
## 27 3.573370e-07 3.573370e-07 3.573370e-07 9.964703e-01 3.573370e-07
## 28 5.184640e-07 5.184640e-07 5.184640e-07 5.184640e-07 5.184640e-07
## 29 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07
## 30 4.600685e-07 4.600685e-07 4.600685e-07 4.600685e-07 4.600685e-07
## 31 4.539668e-07 4.539668e-07 4.539668e-07 4.539668e-07 4.539668e-07
## 32 1.013197e-07 1.013197e-07 9.842067e-02 1.013197e-07 1.013197e-07
## 33 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07
## 34 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07
## 35 1.369899e-06 1.369899e-06 8.778205e-01 1.369899e-06 1.369899e-06
## 36 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07
## 37 2.234452e-01 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07
## 38 2.309387e-07 2.309387e-07 2.309387e-07 2.309387e-07 2.309387e-07
## 39 4.753025e-07 4.753025e-07 4.753026e-07 4.753025e-07 4.753025e-07
## 40 2.139826e-06 2.139826e-06 2.139826e-06 2.139826e-06 2.139826e-06
## X21 X22 X23 X24 X25
## 1 5.827086e-07 5.827086e-07 5.827086e-07 5.827086e-07 5.827086e-07
## 2 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07
## 3 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07
## 4 2.925661e-07 2.925661e-07 2.925661e-07 2.925661e-07 2.925661e-07
## 5 2.969591e-07 2.969591e-07 2.969591e-07 9.390188e-01 6.055442e-02
## 6 1.836680e-02 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07
## 7 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07
## 8 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07
## 9 3.864294e-07 3.864294e-07 3.864294e-07 3.864294e-07 3.864294e-07
## 10 1.254076e-06 1.254076e-06 1.254076e-06 1.254076e-06 1.254076e-06
## 11 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07
## 12 1.021294e-06 1.021294e-06 9.999704e-01 1.021294e-06 1.021294e-06
## 13 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06
## 14 4.712634e-01 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07
## 15 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07
## 16 6.670586e-07 6.670586e-07 6.670586e-07 9.987716e-01 6.670586e-07
## 17 2.842836e-07 2.842836e-07 2.842836e-07 2.842836e-07 2.842836e-07
## 18 7.709251e-07 7.709251e-07 6.724711e-01 7.709251e-07 7.709251e-07
## 19 3.975195e-07 3.975195e-07 2.154976e-01 3.975195e-07 3.975195e-07
## 20 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07
## 21 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07
## 22 2.312168e-06 2.312168e-06 9.552926e-01 2.312168e-06 2.312168e-06
## 23 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07
## 24 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07 5.402598e-07
## 25 4.224338e-01 4.878746e-07 4.878746e-07 4.878746e-07 4.878746e-07
## 26 5.245761e-07 4.165943e-01 5.245761e-07 5.245761e-07 5.245761e-07
## 27 3.519706e-03 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07
## 28 5.184640e-07 5.184640e-07 8.603266e-03 5.184640e-07 5.184640e-07
## 29 5.279604e-07 9.999847e-01 5.279604e-07 5.279604e-07 5.279604e-07
## 30 4.600685e-07 1.492527e-01 4.600685e-07 4.600685e-07 4.600685e-07
## 31 1.646277e-01 4.539668e-07 4.539668e-07 4.539668e-07 4.539668e-07
## 32 1.013197e-07 1.013197e-07 1.013197e-07 1.013197e-07 4.870072e-01
## 33 7.893879e-07 7.893879e-07 9.999771e-01 7.893879e-07 7.893879e-07
## 34 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07
## 35 1.369899e-06 1.369899e-06 1.369899e-06 7.388593e-02 1.369899e-06
## 36 3.660018e-02 2.216635e-07 2.216635e-07 2.216635e-07 2.216635e-07
## 37 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07
## 38 1.989055e-03 2.309387e-07 2.309387e-07 2.309387e-07 2.309387e-07
## 39 4.753025e-07 4.753025e-07 4.753025e-07 4.753025e-07 4.753026e-07
## 40 2.139826e-06 2.139826e-06 6.147556e-02 2.139826e-06 2.139826e-06
## X26 X27 X28 X29 X30
## 1 5.827086e-07 5.827086e-07 5.827086e-07 5.827086e-07 5.827086e-07
## 2 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07 1.943286e-07
## 3 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07 4.189953e-07
## 4 2.925661e-07 2.925661e-07 2.925661e-07 2.925661e-07 2.925661e-07
## 5 2.969591e-07 2.969591e-07 2.969591e-07 4.187530e-04 2.969591e-07
## 6 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07 7.982384e-07
## 7 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07 2.566825e-07
## 8 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07 3.298549e-07
## 9 3.864294e-07 6.982443e-01 3.864294e-07 3.864294e-07 3.864294e-07
## 10 1.254076e-06 1.254076e-06 1.254076e-06 1.254076e-06 1.254076e-06
## 11 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07 4.470402e-07
## 12 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06 1.021294e-06
## 13 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06 1.046976e-06
## 14 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07 3.195077e-07
## 15 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07 2.168366e-07
## 16 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07 6.670586e-07
## 17 2.842836e-07 3.722073e-01 2.842836e-07 2.842836e-07 2.842836e-07
## 18 7.709251e-07 7.709251e-07 3.275073e-01 7.709251e-07 7.709251e-07
## 19 7.844912e-01 3.975195e-07 3.975195e-07 3.975195e-07 3.975195e-07
## 20 3.128316e-01 2.256683e-07 2.256683e-07 2.256683e-07 2.256683e-07
## 21 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07 3.064299e-07
## 22 4.464267e-02 2.312168e-06 2.312168e-06 2.312168e-06 2.312168e-06
## 23 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07 8.573549e-07
## 24 5.402598e-07 5.402598e-07 9.999843e-01 5.402598e-07 5.402598e-07
## 25 4.878746e-07 4.878746e-07 4.878746e-07 4.878746e-07 4.878746e-07
## 26 5.245761e-07 5.245761e-07 5.245761e-07 5.245761e-07 5.245761e-07
## 27 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07 3.573370e-07
## 28 5.184640e-07 5.677053e-02 5.184640e-07 5.184640e-07 5.184640e-07
## 29 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07 5.279604e-07
## 30 4.600685e-07 4.600685e-07 4.600685e-07 4.600685e-07 4.600685e-07
## 31 4.539668e-07 4.539668e-07 4.539668e-07 4.539668e-07 4.539668e-07
## 32 1.013197e-07 1.013197e-07 1.013197e-07 2.818149e-04 2.289705e-04
## 33 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07 7.893879e-07
## 34 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07 2.463617e-07
## 35 1.369899e-06 1.369899e-06 1.369899e-06 1.369899e-06 1.369899e-06
## 36 2.216635e-07 2.216635e-07 2.216635e-07 9.633936e-01 2.216635e-07
## 37 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07 2.490766e-07
## 38 2.309387e-07 1.394314e-02 2.309387e-07 2.309387e-07 2.309387e-07
## 39 4.753025e-07 4.753025e-07 4.753025e-07 4.753025e-07 9.999862e-01
## 40 2.139826e-06 4.703108e-01 2.139826e-06 2.139826e-06 2.139826e-06
## president
## 1 George Washington
## 2 John Adams
## 3 Thomas Jefferson
## 4 James Madison
## 5 James Monroe
## 6 John Quincy Adams
## 7 Andrew Jackson
## 8 Martin van Buren
## 9 John Tyler
## 10 James Polk
## 11 Zachary Taylor
## 12 Millard Fillmore
## 13 Franklin Pierce
## 14 James Buchanan
## 15 Abraham Lincoln
## 16 Andrew Johnson
## 17 Ulysses S. Grant
## 18 Rutherford B. Hayes
## 19 Chester A. Arthur
## 20 Grover Cleveland
## 21 Benjamin Harrison
## 22 William McKinley
## 23 Theodore Roosevelt
## 24 William H. Taft
## 25 Woodrow Wilson
## 26 Warren Harding
## 27 Calvin Coolidge
## 28 Herbert Hoover
## 29 Franklin D. Roosevelt
## 30 Harry S. Truman
## 31 Dwight D. Eisenhower
## 32 John F. Kennedy
## 33 Lyndon B. Johnson
## 34 Richard Nixon
## 35 Gerald R. Ford
## 36 Jimmy Carter
## 37 Ronald Reagan
## 38 George H.W. Bush
## 39 William J. Clinton
## 40 George W. Bush
Preparing the data (again)
heat_df %<>%
gather(topic, value, -president) %>%
mutate(topic = as.factor(as.numeric(str_replace(topic,"X",""))),
president = factor(president)) %>%
as_tibble()
heat_df$president <- factor(heat_df$president, levels = unique(df$president))
heat_df
## # A tibble: 1,200 x 3
## president topic value
## <fct> <fct> <dbl>
## 1 George Washington 1 0.000000583
## 2 John Adams 1 0.000000194
## 3 Thomas Jefferson 1 0.172
## 4 James Madison 1 0.00111
## 5 James Monroe 1 0.000000297
## 6 John Quincy Adams 1 0.982
## 7 Andrew Jackson 1 0.000000257
## 8 Martin van Buren 1 0.000000330
## 9 John Tyler 1 0.000000386
## 10 James Polk 1 0.00000125
## # ... with 1,190 more rows
Heatmap with ggplot2!
heat_df %>% ggplot(aes(topic,fct_rev(president))) +
geom_tile(aes(fill = value), colour = "snow") +
scale_fill_gradient(low = "snow", high = "darkred") +
labs(title = "Topics associated with presidents",
y = "presidents") +
theme_minimal()

#ggsave("heatmap.png")
Explore topics
td_beta <- tidy(lda_model)
top_terms <- td_beta %>%
mutate(term = recode(term, government = "govt")) %>%
arrange(beta) %>%
group_by(topic) %>%
top_n(6, beta) %>%
arrange(-beta) %>%
select(topic, term) %>%
summarise(terms = list(term)) %>%
mutate(terms = map(terms, paste, collapse = ", ")) %>%
unnest()
td_gamma <- tidy(lda_model, matrix = "gamma",
document_names = rownames(df_dtm_pres))
gamma_terms <- td_gamma %>%
group_by(topic) %>%
summarise(gamma = mean(gamma)) %>%
arrange(desc(gamma)) %>%
left_join(top_terms, by = "topic") %>%
mutate(topic = paste0("Topic ", topic),
topic = reorder(topic, gamma))
lda_plot <- gamma_terms %>%
top_n(15, gamma) %>%
ggplot(aes(topic, gamma, label = terms, fill = topic)) +
geom_col(show.legend = FALSE) +
geom_text(hjust = -.05, size = 3, family = "Helvetica") +
coord_flip() +
scale_y_continuous(expand = c(0,0),
limits = c(0, .18),
labels = percent_format()) +
labs(x = NULL,
y = expression(gamma),
title = "LDA: Top 15 topics by prevalence in the SOU",
subtitle = "With the top words that contribute to each topic") +
scale_fill_viridis_d(begin=.3)
lda_plot

ggsave("lda_plot.png", width=10)
## Saving 10 x 5 in image