Chapter 7

Getting the data and distribution of tweets

I got the data from the github for the book tidy-text-mining

First download their twitter archives.

library(lubridate)
library(ggplot2)
library(dplyr)
library(readr)
tweets_julia <- read_csv("https://raw.githubusercontent.com/dgrtwo/tidy-text-mining/master/data/tweets_julia.csv")
Parsed with column specification:
cols(
  tweet_id = col_double(),
  in_reply_to_status_id = col_double(),
  in_reply_to_user_id = col_double(),
  timestamp = col_character(),
  source = col_character(),
  text = col_character(),
  retweeted_status_id = col_double(),
  retweeted_status_user_id = col_double(),
  retweeted_status_timestamp = col_character(),
  expanded_urls = col_character()
)
tweets_dave <- read_csv("https://raw.githubusercontent.com/dgrtwo/tidy-text-mining/master/data/tweets_dave.csv")
Parsed with column specification:
cols(
  tweet_id = col_double(),
  in_reply_to_status_id = col_double(),
  in_reply_to_user_id = col_double(),
  timestamp = col_character(),
  source = col_character(),
  text = col_character(),
  retweeted_status_id = col_double(),
  retweeted_status_user_id = col_double(),
  retweeted_status_timestamp = col_character(),
  expanded_urls = col_character()
)
tweets <- bind_rows(tweets_julia %>% 
                      mutate(person = "Julia"),
                    tweets_dave %>% 
                      mutate(person = "David")) %>%
          mutate(timestamp = ymd_hms(timestamp))
ggplot(tweets, aes(x = timestamp, fill = person)) +
  geom_histogram(position = "identity", bins = 20, show.legend = FALSE) +
  facet_wrap(~person, ncol = 1)

Word Frequencies

To get rid of stopwords and other things, the authors use some regular expresssions. Is this a way to search through the tweets based on different text strings.

library(tidytext)
library(stringr)
replace_reg <- "https://t.co/[A-Za-z\\d]+|http://[A-Za-z\\d]+|&amp;|&lt;|&gt;|RT|https"
unnest_reg <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"
tidy_tweets <- tweets %>% 
  filter(!str_detect(text, "^RT")) %>%
  mutate(text = str_replace_all(text, replace_reg, "")) %>%
  unnest_tokens(word, text, token = "regex", pattern = unnest_reg) %>%
  filter(!word %in% stop_words$word,
         str_detect(word, "[a-z]"))
frequency <- tidy_tweets %>% 
  group_by(person) %>% 
  count(word, sort = TRUE) %>% 
  left_join(tidy_tweets %>% 
              group_by(person) %>% 
              summarise(total = n())) %>%
  mutate(freq = n/total)
Joining, by = "person"
frequency
library(tidyr)
frequency <- frequency %>% 
  select(person, word, freq) %>% 
  spread(person, freq) %>%
  arrange(Julia, David)
frequency
library(scales)
ggplot(frequency, aes(Julia, David)) +
  geom_jitter(alpha = 0.1, size = 2.5, width = 0.25, height = 0.25) +
  geom_text(aes(label = word), check_overlap = TRUE, vjust = 1.5) +
  scale_x_log10(labels = percent_format()) +
  scale_y_log10(labels = percent_format()) +
  geom_abline(color = "red")

Comparing word usage

tidy_tweets <- tidy_tweets %>%
  filter(timestamp >= as.Date("2016-01-01"),
         timestamp < as.Date("2017-01-01"))

To compare word usage the following is calculated.

\(\text{log odds ratio} = \ln{\left(\frac{\left[\frac{n+1}{\text{total}+1}\right]_\text{David}}{\left[\frac{n+1}{\text{total}+1}\right]_\text{Julia}}\right)}\)

Here \(n\) is the number of times a word is used and \(total\) is the total number of words each user had.

Most common words.

word_ratios <- tidy_tweets %>%
  filter(!str_detect(word, "^@")) %>%
  count(word, person) %>%
  filter(sum(n) >= 10) %>%
  ungroup() %>%
  spread(person, n, fill = 0) %>%
  mutate_if(is.numeric, funs((. + 1) / sum(. + 1))) %>%
  mutate(logratio = log(David / Julia)) %>%
  arrange(desc(logratio))
word_ratios %>% 
  arrange(abs(logratio))

Which words are most likely to be from Julia’s account or from David’s account?

word_ratios %>%
  group_by(logratio < 0) %>%
  top_n(15, abs(logratio)) %>%
  ungroup() %>%
  mutate(word = reorder(word, logratio)) %>%
  ggplot(aes(word, logratio, fill = logratio < 0)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  ylab("log odds ratio (David/Julia)") +
  scale_fill_discrete(name = "", labels = c("David", "Julia"))

Changes in words

Chage over time. Which words’ frequencies have changed the fastest in our Twitter feeds? Or to state this another way, which words have we tweeted about at a higher or lower rate as time has passed?

words_by_time <- tidy_tweets %>%
  filter(!str_detect(word, "^@")) %>%
  mutate(time_floor = floor_date(timestamp, unit = "1 month")) %>%
  count(time_floor, person, word) %>%
  ungroup() %>%
  group_by(person, time_floor) %>%
  mutate(time_total = sum(n)) %>%
  group_by(word) %>%
  mutate(word_total = sum(n)) %>%
  ungroup() %>%
  rename(count = n) %>%
  filter(word_total > 30)
words_by_time
nested_data <- words_by_time %>%
  nest(-word, -person) 
nested_data

Note the use of the purrr library and the map() function.

library(purrr)
nested_models <- nested_data %>%
  mutate(models = map(data, ~ glm(cbind(count, time_total) ~ time_floor, ., 
                                  family = "binomial")))
nested_models

Note the use of the broom library.

library(broom)
slopes <- nested_models %>%
  unnest(map(models, tidy)) %>%
  filter(term == "time_floor") %>%
  mutate(adjusted.p.value = p.adjust(p.value))
top_slopes <- slopes %>% 
  filter(adjusted.p.value < 0.1)
top_slopes
words_by_time %>%
  inner_join(top_slopes, by = c("word", "person")) %>%
  filter(person == "David") %>%
  ggplot(aes(time_floor, count/time_total, color = word)) +
  geom_line(size = 1.3) +
  labs(x = NULL, y = "Word frequency")

words_by_time %>%
  inner_join(top_slopes, by = c("word", "person")) %>%
  filter(person == "Julia") %>%
  ggplot(aes(time_floor, count/time_total, color = word)) +
  geom_line(size = 1.3) +
  labs(x = NULL, y = "Word frequency")

Chapter 3

This chapter is about a key question in text mining, what is this document about? We might be able to answer this questions by looking a the frequency of words in the document.

Term frequency (tf) is commonly used.

The statistic tf-idf is intended to measure how important a word is to a document in a collection (or corpus) of documents, for example, to one novel in a collection of novels or to one website in a collection of websites.

\(idf(\text{term}) = \ln{\left(\frac{n_{\text{documents}}}{n_{\text{documents containing term}}}\right)}\)

Term frequency in Jane Austen’s novels

Most common words.

library(dplyr)
library(janeaustenr)
library(tidytext)
book_words <- austen_books() %>%
  unnest_tokens(word, text) %>%
  count(book, word, sort = TRUE) %>%
  ungroup()
total_words <- book_words %>% 
  group_by(book) %>% 
  summarize(total = sum(n))
book_words <- left_join(book_words, total_words)
Joining, by = "book"
book_words

Term frequency.

library(ggplot2)
ggplot(book_words, aes(n/total, fill = book)) +
  geom_histogram(show.legend = FALSE) +
  xlim(NA, 0.0009) +
  facet_wrap(~book, ncol = 2, scales = "free_y")

Zipf’s Law

Zipf’s law states that the frequency that a word appears is inversely proportional to its rank.

freq_by_rank <- book_words %>% 
  group_by(book) %>% 
  mutate(rank = row_number(), 
         `term frequency` = n/total)
freq_by_rank

Zipf’s law is often visualized by plotting rank on the x-axis and term frequency on the y-axis, on logarithmic scales. Plotting this way, an inversely proportional relationship will have a constant, negative slope.

freq_by_rank %>% 
  ggplot(aes(rank, `term frequency`, color = book)) + 
  geom_line(size = 1.1, alpha = 0.8, show.legend = FALSE) + 
  scale_x_log10() +
  scale_y_log10()

rank_subset <- freq_by_rank %>% 
  filter(rank < 500,
         rank > 10)
lm(log10(`term frequency`) ~ log10(rank), data = rank_subset)

Call:
lm(formula = log10(`term frequency`) ~ log10(rank), data = rank_subset)

Coefficients:
(Intercept)  log10(rank)  
    -0.6226      -1.1125  
freq_by_rank %>% 
  ggplot(aes(rank, `term frequency`, color = book)) + 
  geom_abline(intercept = -0.62, slope = -1.1, color = "gray50", linetype = 2) +
  geom_line(size = 1.1, alpha = 0.8, show.legend = FALSE) + 
  scale_x_log10() +
  scale_y_log10()

The bind_tf_idf function

Term Frequency. Inverse Document Frequency.

book_words <- book_words %>%
  bind_tf_idf(word, book, n)
book_words

Terms with high tf-idf in Jane Austen’s works.

book_words %>%
  select(-total) %>%
  arrange(desc(tf_idf))
book_words %>%
  arrange(desc(tf_idf)) %>%
  mutate(word = factor(word, levels = rev(unique(word)))) %>% 
  group_by(book) %>% 
  top_n(15) %>% 
  ungroup %>%
  ggplot(aes(word, tf_idf, fill = book)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "tf-idf") +
  facet_wrap(~book, ncol = 2, scales = "free") +
  coord_flip()
Selecting by tf_idf

Physics Texts

library(gutenbergr)
physics <- gutenberg_download(c(37729, 14725, 13476, 5001), 
                              meta_fields = "author")
physics_words <- physics %>%
  unnest_tokens(word, text) %>%
  count(author, word, sort = TRUE) %>%
  ungroup()
physics_words
plot_physics <- physics_words %>%
  bind_tf_idf(word, author, n) %>%
  arrange(desc(tf_idf)) %>%
  mutate(word = factor(word, levels = rev(unique(word)))) %>%
  mutate(author = factor(author, levels = c("Galilei, Galileo",
                                            "Huygens, Christiaan", 
                                            "Tesla, Nikola",
                                            "Einstein, Albert")))
plot_physics %>% 
  group_by(author) %>% 
  top_n(15, tf_idf) %>% 
  ungroup() %>%
  mutate(word = reorder(word, tf_idf)) %>%
  ggplot(aes(word, tf_idf, fill = author)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "tf-idf") +
  facet_wrap(~author, ncol = 2, scales = "free") +
  coord_flip()

library(stringr)
physics %>% 
  filter(str_detect(text, "eq\\.")) %>% 
  select(text)
physics %>% 
  filter(str_detect(text, "K1")) %>% 
  select(text)
physics %>% 
  filter(str_detect(text, "AK")) %>% 
  select(text)
mystopwords <- data_frame(word = c("eq", "co", "rc", "ac", "ak", "bn", 
                                   "fig", "file", "cg", "cb", "cm"))
physics_words <- anti_join(physics_words, mystopwords, by = "word")
plot_physics <- physics_words %>%
  bind_tf_idf(word, author, n) %>%
  arrange(desc(tf_idf)) %>%
  mutate(word = factor(word, levels = rev(unique(word)))) %>%
  group_by(author) %>% 
  top_n(15, tf_idf) %>%
  ungroup %>%
  mutate(author = factor(author, levels = c("Galilei, Galileo",
                                            "Huygens, Christiaan",
                                            "Tesla, Nikola",
                                            "Einstein, Albert")))
ggplot(plot_physics, aes(word, tf_idf, fill = author)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "tf-idf") +
  facet_wrap(~author, ncol = 2, scales = "free") +
  coord_flip()

LS0tCnRpdGxlOiAiQ2hhcHRlciAzICYgNyAtIHRmLWlkZiwgWmlwZiwgdHdpdHRlciBhcmNoaXZlcyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyBDaGFwdGVyIDcKCiMjIEdldHRpbmcgdGhlIGRhdGEgYW5kIGRpc3RyaWJ1dGlvbiBvZiB0d2VldHMKCkkgZ290IHRoZSBkYXRhIGZyb20gdGhlIGdpdGh1YiBmb3IgdGhlIGJvb2sgW3RpZHktdGV4dC1taW5pbmddKGh0dHBzOi8vZ2l0aHViLmNvbS9kZ3J0d28vdGlkeS10ZXh0LW1pbmluZykKCkZpcnN0IGRvd25sb2FkIHRoZWlyIHR3aXR0ZXIgYXJjaGl2ZXMuCgoKYGBge3J9CmxpYnJhcnkobHVicmlkYXRlKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkocmVhZHIpCgp0d2VldHNfanVsaWEgPC0gcmVhZF9jc3YoImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9kZ3J0d28vdGlkeS10ZXh0LW1pbmluZy9tYXN0ZXIvZGF0YS90d2VldHNfanVsaWEuY3N2IikKdHdlZXRzX2RhdmUgPC0gcmVhZF9jc3YoImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9kZ3J0d28vdGlkeS10ZXh0LW1pbmluZy9tYXN0ZXIvZGF0YS90d2VldHNfZGF2ZS5jc3YiKQoKdHdlZXRzIDwtIGJpbmRfcm93cyh0d2VldHNfanVsaWEgJT4lIAogICAgICAgICAgICAgICAgICAgICAgbXV0YXRlKHBlcnNvbiA9ICJKdWxpYSIpLAogICAgICAgICAgICAgICAgICAgIHR3ZWV0c19kYXZlICU+JSAKICAgICAgICAgICAgICAgICAgICAgIG11dGF0ZShwZXJzb24gPSAiRGF2aWQiKSkgJT4lCiAgICAgICAgICBtdXRhdGUodGltZXN0YW1wID0geW1kX2htcyh0aW1lc3RhbXApKQpgYGAKCmBgYHtyfQpnZ3Bsb3QodHdlZXRzLCBhZXMoeCA9IHRpbWVzdGFtcCwgZmlsbCA9IHBlcnNvbikpICsKICBnZW9tX2hpc3RvZ3JhbShwb3NpdGlvbiA9ICJpZGVudGl0eSIsIGJpbnMgPSAyMCwgc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGZhY2V0X3dyYXAofnBlcnNvbiwgbmNvbCA9IDEpCmBgYAoKIyMgV29yZCBGcmVxdWVuY2llcwoKVG8gZ2V0IHJpZCBvZiBzdG9wd29yZHMgYW5kIG90aGVyIHRoaW5ncywgdGhlIGF1dGhvcnMgdXNlIHNvbWUgcmVndWxhciBleHByZXNzc2lvbnMuICBJcyB0aGlzIGEgd2F5IHRvIHNlYXJjaCB0aHJvdWdoIHRoZSB0d2VldHMgYmFzZWQgb24gZGlmZmVyZW50IHRleHQgc3RyaW5ncy4KCmBgYHtyfQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KHN0cmluZ3IpCgpyZXBsYWNlX3JlZyA8LSAiaHR0cHM6Ly90LmNvL1tBLVphLXpcXGRdK3xodHRwOi8vW0EtWmEtelxcZF0rfCZhbXA7fCZsdDt8Jmd0O3xSVHxodHRwcyIKdW5uZXN0X3JlZyA8LSAiKFteQS1aYS16X1xcZCNAJ118Jyg/IVtBLVphLXpfXFxkI0BdKSkiCgp0aWR5X3R3ZWV0cyA8LSB0d2VldHMgJT4lIAogIGZpbHRlcighc3RyX2RldGVjdCh0ZXh0LCAiXlJUIikpICU+JQogIG11dGF0ZSh0ZXh0ID0gc3RyX3JlcGxhY2VfYWxsKHRleHQsIHJlcGxhY2VfcmVnLCAiIikpICU+JQogIHVubmVzdF90b2tlbnMod29yZCwgdGV4dCwgdG9rZW4gPSAicmVnZXgiLCBwYXR0ZXJuID0gdW5uZXN0X3JlZykgJT4lCiAgZmlsdGVyKCF3b3JkICVpbiUgc3RvcF93b3JkcyR3b3JkLAogICAgICAgICBzdHJfZGV0ZWN0KHdvcmQsICJbYS16XSIpKQpgYGAKCgpgYGB7cn0KZnJlcXVlbmN5IDwtIHRpZHlfdHdlZXRzICU+JSAKICBncm91cF9ieShwZXJzb24pICU+JSAKICBjb3VudCh3b3JkLCBzb3J0ID0gVFJVRSkgJT4lIAogIGxlZnRfam9pbih0aWR5X3R3ZWV0cyAlPiUgCiAgICAgICAgICAgICAgZ3JvdXBfYnkocGVyc29uKSAlPiUgCiAgICAgICAgICAgICAgc3VtbWFyaXNlKHRvdGFsID0gbigpKSkgJT4lCiAgbXV0YXRlKGZyZXEgPSBuL3RvdGFsKQoKZnJlcXVlbmN5CmBgYAoKYGBge3J9CmxpYnJhcnkodGlkeXIpCgpmcmVxdWVuY3kgPC0gZnJlcXVlbmN5ICU+JSAKICBzZWxlY3QocGVyc29uLCB3b3JkLCBmcmVxKSAlPiUgCiAgc3ByZWFkKHBlcnNvbiwgZnJlcSkgJT4lCiAgYXJyYW5nZShKdWxpYSwgRGF2aWQpCgpmcmVxdWVuY3kKYGBgCgoKYGBge3J9CmxpYnJhcnkoc2NhbGVzKQoKZ2dwbG90KGZyZXF1ZW5jeSwgYWVzKEp1bGlhLCBEYXZpZCkpICsKICBnZW9tX2ppdHRlcihhbHBoYSA9IDAuMSwgc2l6ZSA9IDIuNSwgd2lkdGggPSAwLjI1LCBoZWlnaHQgPSAwLjI1KSArCiAgZ2VvbV90ZXh0KGFlcyhsYWJlbCA9IHdvcmQpLCBjaGVja19vdmVybGFwID0gVFJVRSwgdmp1c3QgPSAxLjUpICsKICBzY2FsZV94X2xvZzEwKGxhYmVscyA9IHBlcmNlbnRfZm9ybWF0KCkpICsKICBzY2FsZV95X2xvZzEwKGxhYmVscyA9IHBlcmNlbnRfZm9ybWF0KCkpICsKICBnZW9tX2FibGluZShjb2xvciA9ICJyZWQiKQpgYGAKCgojIyBDb21wYXJpbmcgd29yZCB1c2FnZQoKYGBge3J9CnRpZHlfdHdlZXRzIDwtIHRpZHlfdHdlZXRzICU+JQogIGZpbHRlcih0aW1lc3RhbXAgPj0gYXMuRGF0ZSgiMjAxNi0wMS0wMSIpLAogICAgICAgICB0aW1lc3RhbXAgPCBhcy5EYXRlKCIyMDE3LTAxLTAxIikpCmBgYAoKVG8gY29tcGFyZSB3b3JkIHVzYWdlIHRoZSBmb2xsb3dpbmcgaXMgY2FsY3VsYXRlZC4KCiRcdGV4dHtsb2cgb2RkcyByYXRpb30gPSBcbG57XGxlZnQoXGZyYWN7XGxlZnRbXGZyYWN7bisxfXtcdGV4dHt0b3RhbH0rMX1ccmlnaHRdX1x0ZXh0e0RhdmlkfX17XGxlZnRbXGZyYWN7bisxfXtcdGV4dHt0b3RhbH0rMX1ccmlnaHRdX1x0ZXh0e0p1bGlhfX1ccmlnaHQpfSQKCkhlcmUgJG4kIGlzIHRoZSBudW1iZXIgb2YgdGltZXMgYSB3b3JkIGlzIHVzZWQgYW5kICR0b3RhbCQgaXMgdGhlIHRvdGFsIG51bWJlciBvZiB3b3JkcyBlYWNoIHVzZXIgaGFkLgoKTW9zdCBjb21tb24gd29yZHMuCgpgYGB7cn0Kd29yZF9yYXRpb3MgPC0gdGlkeV90d2VldHMgJT4lCiAgZmlsdGVyKCFzdHJfZGV0ZWN0KHdvcmQsICJeQCIpKSAlPiUKICBjb3VudCh3b3JkLCBwZXJzb24pICU+JQogIGZpbHRlcihzdW0obikgPj0gMTApICU+JQogIHVuZ3JvdXAoKSAlPiUKICBzcHJlYWQocGVyc29uLCBuLCBmaWxsID0gMCkgJT4lCiAgbXV0YXRlX2lmKGlzLm51bWVyaWMsIGZ1bnMoKC4gKyAxKSAvIHN1bSguICsgMSkpKSAlPiUKICBtdXRhdGUobG9ncmF0aW8gPSBsb2coRGF2aWQgLyBKdWxpYSkpICU+JQogIGFycmFuZ2UoZGVzYyhsb2dyYXRpbykpCmBgYAoKYGBge3J9CndvcmRfcmF0aW9zICU+JSAKICBhcnJhbmdlKGFicyhsb2dyYXRpbykpCmBgYAoKV2hpY2ggd29yZHMgYXJlIG1vc3QgbGlrZWx5IHRvIGJlIGZyb20gSnVsaWHigJlzIGFjY291bnQgb3IgZnJvbSBEYXZpZOKAmXMgYWNjb3VudD8gCgpgYGB7cn0Kd29yZF9yYXRpb3MgJT4lCiAgZ3JvdXBfYnkobG9ncmF0aW8gPCAwKSAlPiUKICB0b3BfbigxNSwgYWJzKGxvZ3JhdGlvKSkgJT4lCiAgdW5ncm91cCgpICU+JQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcih3b3JkLCBsb2dyYXRpbykpICU+JQogIGdncGxvdChhZXMod29yZCwgbG9ncmF0aW8sIGZpbGwgPSBsb2dyYXRpbyA8IDApKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGNvb3JkX2ZsaXAoKSArCiAgeWxhYigibG9nIG9kZHMgcmF0aW8gKERhdmlkL0p1bGlhKSIpICsKICBzY2FsZV9maWxsX2Rpc2NyZXRlKG5hbWUgPSAiIiwgbGFiZWxzID0gYygiRGF2aWQiLCAiSnVsaWEiKSkKYGBgCgojIyBDaGFuZ2VzIGluIHdvcmRzCgpDaGFnZSBvdmVyIHRpbWUuICBXaGljaCB3b3Jkc+KAmSBmcmVxdWVuY2llcyBoYXZlIGNoYW5nZWQgdGhlIGZhc3Rlc3QgaW4gb3VyIFR3aXR0ZXIgZmVlZHM/IE9yIHRvIHN0YXRlIHRoaXMgYW5vdGhlciB3YXksIHdoaWNoIHdvcmRzIGhhdmUgd2UgdHdlZXRlZCBhYm91dCBhdCBhIGhpZ2hlciBvciBsb3dlciByYXRlIGFzIHRpbWUgaGFzIHBhc3NlZD8gCgpgYGB7cn0Kd29yZHNfYnlfdGltZSA8LSB0aWR5X3R3ZWV0cyAlPiUKICBmaWx0ZXIoIXN0cl9kZXRlY3Qod29yZCwgIl5AIikpICU+JQogIG11dGF0ZSh0aW1lX2Zsb29yID0gZmxvb3JfZGF0ZSh0aW1lc3RhbXAsIHVuaXQgPSAiMSBtb250aCIpKSAlPiUKICBjb3VudCh0aW1lX2Zsb29yLCBwZXJzb24sIHdvcmQpICU+JQogIHVuZ3JvdXAoKSAlPiUKICBncm91cF9ieShwZXJzb24sIHRpbWVfZmxvb3IpICU+JQogIG11dGF0ZSh0aW1lX3RvdGFsID0gc3VtKG4pKSAlPiUKICBncm91cF9ieSh3b3JkKSAlPiUKICBtdXRhdGUod29yZF90b3RhbCA9IHN1bShuKSkgJT4lCiAgdW5ncm91cCgpICU+JQogIHJlbmFtZShjb3VudCA9IG4pICU+JQogIGZpbHRlcih3b3JkX3RvdGFsID4gMzApCgp3b3Jkc19ieV90aW1lCmBgYAoKYGBge3J9Cm5lc3RlZF9kYXRhIDwtIHdvcmRzX2J5X3RpbWUgJT4lCiAgbmVzdCgtd29yZCwgLXBlcnNvbikgCgpuZXN0ZWRfZGF0YQpgYGAKCk5vdGUgdGhlIHVzZSBvZiB0aGUgcHVycnIgbGlicmFyeSBhbmQgdGhlIG1hcCgpIGZ1bmN0aW9uLgoKYGBge3J9CmxpYnJhcnkocHVycnIpCgpuZXN0ZWRfbW9kZWxzIDwtIG5lc3RlZF9kYXRhICU+JQogIG11dGF0ZShtb2RlbHMgPSBtYXAoZGF0YSwgfiBnbG0oY2JpbmQoY291bnQsIHRpbWVfdG90YWwpIH4gdGltZV9mbG9vciwgLiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmYW1pbHkgPSAiYmlub21pYWwiKSkpCgpuZXN0ZWRfbW9kZWxzCmBgYAoKTm90ZSB0aGUgdXNlIG9mIHRoZSBicm9vbSBsaWJyYXJ5LgoKYGBge3J9CmxpYnJhcnkoYnJvb20pCgpzbG9wZXMgPC0gbmVzdGVkX21vZGVscyAlPiUKICB1bm5lc3QobWFwKG1vZGVscywgdGlkeSkpICU+JQogIGZpbHRlcih0ZXJtID09ICJ0aW1lX2Zsb29yIikgJT4lCiAgbXV0YXRlKGFkanVzdGVkLnAudmFsdWUgPSBwLmFkanVzdChwLnZhbHVlKSkKYGBgCgpgYGB7cn0KdG9wX3Nsb3BlcyA8LSBzbG9wZXMgJT4lIAogIGZpbHRlcihhZGp1c3RlZC5wLnZhbHVlIDwgMC4xKQoKdG9wX3Nsb3BlcwpgYGAKCmBgYHtyfQp3b3Jkc19ieV90aW1lICU+JQogIGlubmVyX2pvaW4odG9wX3Nsb3BlcywgYnkgPSBjKCJ3b3JkIiwgInBlcnNvbiIpKSAlPiUKICBmaWx0ZXIocGVyc29uID09ICJEYXZpZCIpICU+JQogIGdncGxvdChhZXModGltZV9mbG9vciwgY291bnQvdGltZV90b3RhbCwgY29sb3IgPSB3b3JkKSkgKwogIGdlb21fbGluZShzaXplID0gMS4zKSArCiAgbGFicyh4ID0gTlVMTCwgeSA9ICJXb3JkIGZyZXF1ZW5jeSIpCmBgYAoKYGBge3J9CndvcmRzX2J5X3RpbWUgJT4lCiAgaW5uZXJfam9pbih0b3Bfc2xvcGVzLCBieSA9IGMoIndvcmQiLCAicGVyc29uIikpICU+JQogIGZpbHRlcihwZXJzb24gPT0gIkp1bGlhIikgJT4lCiAgZ2dwbG90KGFlcyh0aW1lX2Zsb29yLCBjb3VudC90aW1lX3RvdGFsLCBjb2xvciA9IHdvcmQpKSArCiAgZ2VvbV9saW5lKHNpemUgPSAxLjMpICsKICBsYWJzKHggPSBOVUxMLCB5ID0gIldvcmQgZnJlcXVlbmN5IikKYGBgCgoKCiMgQ2hhcHRlciAzCgpUaGlzIGNoYXB0ZXIgaXMgYWJvdXQgYSBrZXkgcXVlc3Rpb24gaW4gdGV4dCBtaW5pbmcsIHdoYXQgaXMgdGhpcyBkb2N1bWVudCBhYm91dD8gIFdlIG1pZ2h0IGJlIGFibGUgdG8gYW5zd2VyIHRoaXMgcXVlc3Rpb25zIGJ5IGxvb2tpbmcgYSB0aGUgZnJlcXVlbmN5IG9mIHdvcmRzIGluIHRoZSBkb2N1bWVudC4KClRlcm0gZnJlcXVlbmN5ICh0ZikgaXMgY29tbW9ubHkgdXNlZC4KClRoZSBzdGF0aXN0aWMgdGYtaWRmIGlzIGludGVuZGVkIHRvIG1lYXN1cmUgaG93IGltcG9ydGFudCBhIHdvcmQgaXMgdG8gYSBkb2N1bWVudCBpbiBhIGNvbGxlY3Rpb24gKG9yIGNvcnB1cykgb2YgZG9jdW1lbnRzLCBmb3IgZXhhbXBsZSwgdG8gb25lIG5vdmVsIGluIGEgY29sbGVjdGlvbiBvZiBub3ZlbHMgb3IgdG8gb25lIHdlYnNpdGUgaW4gYSBjb2xsZWN0aW9uIG9mIHdlYnNpdGVzLgoKJGlkZihcdGV4dHt0ZXJtfSkgPSBcbG57XGxlZnQoXGZyYWN7bl97XHRleHR7ZG9jdW1lbnRzfX19e25fe1x0ZXh0e2RvY3VtZW50cyBjb250YWluaW5nIHRlcm19fX1ccmlnaHQpfSQKCgojIFRlcm0gZnJlcXVlbmN5IGluIEphbmUgQXVzdGVuJ3Mgbm92ZWxzCgpNb3N0IGNvbW1vbiB3b3Jkcy4KCmBgYHtyfQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KGphbmVhdXN0ZW5yKQpsaWJyYXJ5KHRpZHl0ZXh0KQoKYm9va193b3JkcyA8LSBhdXN0ZW5fYm9va3MoKSAlPiUKICB1bm5lc3RfdG9rZW5zKHdvcmQsIHRleHQpICU+JQogIGNvdW50KGJvb2ssIHdvcmQsIHNvcnQgPSBUUlVFKSAlPiUKICB1bmdyb3VwKCkKCnRvdGFsX3dvcmRzIDwtIGJvb2tfd29yZHMgJT4lIAogIGdyb3VwX2J5KGJvb2spICU+JSAKICBzdW1tYXJpemUodG90YWwgPSBzdW0obikpCgpib29rX3dvcmRzIDwtIGxlZnRfam9pbihib29rX3dvcmRzLCB0b3RhbF93b3JkcykKCmJvb2tfd29yZHMKYGBgCgpUZXJtIGZyZXF1ZW5jeS4KCmBgYHtyfQpsaWJyYXJ5KGdncGxvdDIpCgpnZ3Bsb3QoYm9va193b3JkcywgYWVzKG4vdG90YWwsIGZpbGwgPSBib29rKSkgKwogIGdlb21faGlzdG9ncmFtKHNob3cubGVnZW5kID0gRkFMU0UpICsKICB4bGltKE5BLCAwLjAwMDkpICsKICBmYWNldF93cmFwKH5ib29rLCBuY29sID0gMiwgc2NhbGVzID0gImZyZWVfeSIpCmBgYAoKIyBaaXBmJ3MgTGF3CgpaaXBm4oCZcyBsYXcgc3RhdGVzIHRoYXQgdGhlIGZyZXF1ZW5jeSB0aGF0IGEgd29yZCBhcHBlYXJzIGlzIGludmVyc2VseSBwcm9wb3J0aW9uYWwgdG8gaXRzIHJhbmsuCgpgYGB7cn0KZnJlcV9ieV9yYW5rIDwtIGJvb2tfd29yZHMgJT4lIAogIGdyb3VwX2J5KGJvb2spICU+JSAKICBtdXRhdGUocmFuayA9IHJvd19udW1iZXIoKSwgCiAgICAgICAgIGB0ZXJtIGZyZXF1ZW5jeWAgPSBuL3RvdGFsKQoKZnJlcV9ieV9yYW5rCmBgYAoKWmlwZuKAmXMgbGF3IGlzIG9mdGVuIHZpc3VhbGl6ZWQgYnkgcGxvdHRpbmcgcmFuayBvbiB0aGUgeC1heGlzIGFuZCB0ZXJtIGZyZXF1ZW5jeSBvbiB0aGUgeS1heGlzLCBvbiBsb2dhcml0aG1pYyBzY2FsZXMuIFBsb3R0aW5nIHRoaXMgd2F5LCBhbiBpbnZlcnNlbHkgcHJvcG9ydGlvbmFsIHJlbGF0aW9uc2hpcCB3aWxsIGhhdmUgYSBjb25zdGFudCwgbmVnYXRpdmUgc2xvcGUuCgpgYGB7cn0KZnJlcV9ieV9yYW5rICU+JSAKICBnZ3Bsb3QoYWVzKHJhbmssIGB0ZXJtIGZyZXF1ZW5jeWAsIGNvbG9yID0gYm9vaykpICsgCiAgZ2VvbV9saW5lKHNpemUgPSAxLjEsIGFscGhhID0gMC44LCBzaG93LmxlZ2VuZCA9IEZBTFNFKSArIAogIHNjYWxlX3hfbG9nMTAoKSArCiAgc2NhbGVfeV9sb2cxMCgpCmBgYAoKYGBge3J9CnJhbmtfc3Vic2V0IDwtIGZyZXFfYnlfcmFuayAlPiUgCiAgZmlsdGVyKHJhbmsgPCA1MDAsCiAgICAgICAgIHJhbmsgPiAxMCkKCmxtKGxvZzEwKGB0ZXJtIGZyZXF1ZW5jeWApIH4gbG9nMTAocmFuayksIGRhdGEgPSByYW5rX3N1YnNldCkKYGBgCgpgYGB7cn0KZnJlcV9ieV9yYW5rICU+JSAKICBnZ3Bsb3QoYWVzKHJhbmssIGB0ZXJtIGZyZXF1ZW5jeWAsIGNvbG9yID0gYm9vaykpICsgCiAgZ2VvbV9hYmxpbmUoaW50ZXJjZXB0ID0gLTAuNjIsIHNsb3BlID0gLTEuMSwgY29sb3IgPSAiZ3JheTUwIiwgbGluZXR5cGUgPSAyKSArCiAgZ2VvbV9saW5lKHNpemUgPSAxLjEsIGFscGhhID0gMC44LCBzaG93LmxlZ2VuZCA9IEZBTFNFKSArIAogIHNjYWxlX3hfbG9nMTAoKSArCiAgc2NhbGVfeV9sb2cxMCgpCmBgYAoKIyMgVGhlIGJpbmRfdGZfaWRmIGZ1bmN0aW9uCgpUZXJtIEZyZXF1ZW5jeS4gIEludmVyc2UgRG9jdW1lbnQgRnJlcXVlbmN5LgoKYGBge3J9CmJvb2tfd29yZHMgPC0gYm9va193b3JkcyAlPiUKICBiaW5kX3RmX2lkZih3b3JkLCBib29rLCBuKQpib29rX3dvcmRzCmBgYAoKVGVybXMgd2l0aCBoaWdoIHRmLWlkZiBpbiBKYW5lIEF1c3RlbuKAmXMgd29ya3MuCgpgYGB7cn0KYm9va193b3JkcyAlPiUKICBzZWxlY3QoLXRvdGFsKSAlPiUKICBhcnJhbmdlKGRlc2ModGZfaWRmKSkKYGBgCgoKCmBgYHtyfQpib29rX3dvcmRzICU+JQogIGFycmFuZ2UoZGVzYyh0Zl9pZGYpKSAlPiUKICBtdXRhdGUod29yZCA9IGZhY3Rvcih3b3JkLCBsZXZlbHMgPSByZXYodW5pcXVlKHdvcmQpKSkpICU+JSAKICBncm91cF9ieShib29rKSAlPiUgCiAgdG9wX24oMTUpICU+JSAKICB1bmdyb3VwICU+JQogIGdncGxvdChhZXMod29yZCwgdGZfaWRmLCBmaWxsID0gYm9vaykpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgbGFicyh4ID0gTlVMTCwgeSA9ICJ0Zi1pZGYiKSArCiAgZmFjZXRfd3JhcCh+Ym9vaywgbmNvbCA9IDIsIHNjYWxlcyA9ICJmcmVlIikgKwogIGNvb3JkX2ZsaXAoKQpgYGAKCiMgUGh5c2ljcyBUZXh0cwoKYGBge3J9CmxpYnJhcnkoZ3V0ZW5iZXJncikKcGh5c2ljcyA8LSBndXRlbmJlcmdfZG93bmxvYWQoYygzNzcyOSwgMTQ3MjUsIDEzNDc2LCA1MDAxKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1ldGFfZmllbGRzID0gImF1dGhvciIpCmBgYAoKCmBgYHtyfQpwaHlzaWNzX3dvcmRzIDwtIHBoeXNpY3MgJT4lCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCB0ZXh0KSAlPiUKICBjb3VudChhdXRob3IsIHdvcmQsIHNvcnQgPSBUUlVFKSAlPiUKICB1bmdyb3VwKCkKCnBoeXNpY3Nfd29yZHMKYGBgCgoKYGBge3J9CnBsb3RfcGh5c2ljcyA8LSBwaHlzaWNzX3dvcmRzICU+JQogIGJpbmRfdGZfaWRmKHdvcmQsIGF1dGhvciwgbikgJT4lCiAgYXJyYW5nZShkZXNjKHRmX2lkZikpICU+JQogIG11dGF0ZSh3b3JkID0gZmFjdG9yKHdvcmQsIGxldmVscyA9IHJldih1bmlxdWUod29yZCkpKSkgJT4lCiAgbXV0YXRlKGF1dGhvciA9IGZhY3RvcihhdXRob3IsIGxldmVscyA9IGMoIkdhbGlsZWksIEdhbGlsZW8iLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJIdXlnZW5zLCBDaHJpc3RpYWFuIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIlRlc2xhLCBOaWtvbGEiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJFaW5zdGVpbiwgQWxiZXJ0IikpKQoKcGxvdF9waHlzaWNzICU+JSAKICBncm91cF9ieShhdXRob3IpICU+JSAKICB0b3BfbigxNSwgdGZfaWRmKSAlPiUgCiAgdW5ncm91cCgpICU+JQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcih3b3JkLCB0Zl9pZGYpKSAlPiUKICBnZ3Bsb3QoYWVzKHdvcmQsIHRmX2lkZiwgZmlsbCA9IGF1dGhvcikpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgbGFicyh4ID0gTlVMTCwgeSA9ICJ0Zi1pZGYiKSArCiAgZmFjZXRfd3JhcCh+YXV0aG9yLCBuY29sID0gMiwgc2NhbGVzID0gImZyZWUiKSArCiAgY29vcmRfZmxpcCgpCgpgYGAKCgpgYGB7cn0KbGlicmFyeShzdHJpbmdyKQoKcGh5c2ljcyAlPiUgCiAgZmlsdGVyKHN0cl9kZXRlY3QodGV4dCwgImVxXFwuIikpICU+JSAKICBzZWxlY3QodGV4dCkKYGBgCgoKYGBge3J9CnBoeXNpY3MgJT4lIAogIGZpbHRlcihzdHJfZGV0ZWN0KHRleHQsICJLMSIpKSAlPiUgCiAgc2VsZWN0KHRleHQpCmBgYAoKCgpgYGB7cn0KcGh5c2ljcyAlPiUgCiAgZmlsdGVyKHN0cl9kZXRlY3QodGV4dCwgIkFLIikpICU+JSAKICBzZWxlY3QodGV4dCkKYGBgCgoKCmBgYHtyfQpteXN0b3B3b3JkcyA8LSBkYXRhX2ZyYW1lKHdvcmQgPSBjKCJlcSIsICJjbyIsICJyYyIsICJhYyIsICJhayIsICJibiIsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJmaWciLCAiZmlsZSIsICJjZyIsICJjYiIsICJjbSIpKQpwaHlzaWNzX3dvcmRzIDwtIGFudGlfam9pbihwaHlzaWNzX3dvcmRzLCBteXN0b3B3b3JkcywgYnkgPSAid29yZCIpCnBsb3RfcGh5c2ljcyA8LSBwaHlzaWNzX3dvcmRzICU+JQogIGJpbmRfdGZfaWRmKHdvcmQsIGF1dGhvciwgbikgJT4lCiAgYXJyYW5nZShkZXNjKHRmX2lkZikpICU+JQogIG11dGF0ZSh3b3JkID0gZmFjdG9yKHdvcmQsIGxldmVscyA9IHJldih1bmlxdWUod29yZCkpKSkgJT4lCiAgZ3JvdXBfYnkoYXV0aG9yKSAlPiUgCiAgdG9wX24oMTUsIHRmX2lkZikgJT4lCiAgdW5ncm91cCAlPiUKICBtdXRhdGUoYXV0aG9yID0gZmFjdG9yKGF1dGhvciwgbGV2ZWxzID0gYygiR2FsaWxlaSwgR2FsaWxlbyIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIkh1eWdlbnMsIENocmlzdGlhYW4iLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJUZXNsYSwgTmlrb2xhIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiRWluc3RlaW4sIEFsYmVydCIpKSkKCmdncGxvdChwbG90X3BoeXNpY3MsIGFlcyh3b3JkLCB0Zl9pZGYsIGZpbGwgPSBhdXRob3IpKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGxhYnMoeCA9IE5VTEwsIHkgPSAidGYtaWRmIikgKwogIGZhY2V0X3dyYXAofmF1dGhvciwgbmNvbCA9IDIsIHNjYWxlcyA9ICJmcmVlIikgKwogIGNvb3JkX2ZsaXAoKQpgYGAKCgo=