使用Text Mining with R - A Tidy Approach ch1~ch6 分析Hilton Hawaiian Village Tripadvisor review

Data來源:https://github.com/susanli2016/Data-Analysis-with-R/blob/master/Hilton_Hawaiian_Village_Waikiki_Beach_Resort-Honolulu_Oahu_Hawaii__en.csv

Ch1.The tidy text format

library(dplyr)
library(readr)
library(lubridate)
library(ggplot2)
library(tidytext)
library(tidyverse)
library(stringr)
library(tidyr)
library(scales)
library(broom)
library(purrr)
library(widyr)
library(igraph)
library(ggraph)
library(SnowballC)
library(wordcloud)
library(reshape2)
library(topicmodels)
theme_set(theme_minimal())
df <- read_csv("Hilton_Hawaiian_Village_Waikiki_Beach_Resort-Honolulu_Oahu_Hawaii__en.csv")
Parsed with column specification:
cols(
  review_body = col_character(),
  review_date = col_character()
)
df <- df[complete.cases(df), ]
df$id <- c(1:nrow(df))
df$review_date <- as.Date(df$review_date, format = "%d-%B-%y")
dim(df); min(df$review_date); max(df$review_date)
[1] 13701     3
[1] "2002-03-21"
[1] "2018-08-02"
df$review_body = gsub("[[:digit:]]", "", df$review_body) #去除數字 避免後面關聯圖出現很多數字
tidy_df <- df %>%
  unnest_tokens(word, review_body)

data(stop_words)
tidy_df <- tidy_df %>%
  anti_join(stop_words)
Joining, by = "word"

全部評論中的字頻

tidy_df %>%
  count(word, sort = TRUE) 
tidy_df %>%
  count(word, sort = TRUE) %>%
  filter(n > 5500) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n)) +
  geom_col() +
  xlab(NULL) +
  coord_flip()

Ch2.Sentiment analysis

使用字典nrc 查看在評論中出現哪些joy的字

nrc_joy <- get_sentiments("nrc") %>% 
  filter(sentiment == "joy")
tidy_df %>%
  inner_join(nrc_joy) %>%
  count(word, sort = TRUE) #注意的點是beach,food,diamond...也在joy裡
Joining, by = "word"

可以看出主要是關於飯店的整潔clean,友善friendly,helpful,環境或建築的美麗pretty….等

原tidytext文章範例是以小說篇章編號為x軸,y軸為情緒分數來做圖,表現小說情緒依據劇情演進而產生的變化
不過hotel review就要變成以x軸為日期了

#日期從2002-03-21~2018-08-02
summary(tidy_df$review_date) 
        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2002-03-21" "2012-01-05" "2014-04-28" "2013-08-26" "2016-03-14" "2018-08-02" 
sentiment <- tidy_df %>%
  inner_join(get_sentiments("bing")) %>%
  count(id,sentiment,review_date) %>% 
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = positive - negative)
Joining, by = "word"
#col sentiment為總分數
sen_byyear <- sentiment  %>% group_by(year(review_date)) %>% 
  summarise(
  sentiment = mean(sentiment))%>% 
  as.data.frame()
sen_byyear$`year(review_date)` = as.character(sen_byyear$`year(review_date)`)
sen_byyear$`year(review_date)` <- as.Date(sen_byyear$`year(review_date)`,format = "%Y")
ggplot(sen_byyear, aes(`year(review_date)`, sentiment)) +
  geom_line()+
  scale_x_date(date_breaks = "1 year", date_labels = "%Y")

顯示評論平均情緒分數在2006年時有下降趨勢,在最近的2018甚至是最低點

most common positive and negative words

bing_word_counts <- tidy_df %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  ungroup()
Joining, by = "word"
bing_word_counts
bing_word_counts %>%
  group_by(sentiment) %>%
  top_n(10) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(y = "Contribution to sentiment",
       x = NULL) +
  coord_flip()
Selecting by n

word cloud

tidy_df %>%
  anti_join(stop_words) %>%
  count(word) %>%
  with(wordcloud(word, n, max.words = 100))
Joining, by = "word"

tidy_df %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  comparison.cloud(colors = c("gray20", "gray80"),
                   max.words = 100)
Joining, by = "word"

Ch3.Analyzing word and document frequency: tf-idf

#看看未去除stop words前的term frequency
df_words <- df %>%
  unnest_tokens(word, review_body) %>%
  count(id, word, sort = TRUE) %>%
  ungroup()
total_words <- df_words %>% 
  group_by(id) %>% 
  summarize(total = sum(n))
book_words <- left_join(df_words, total_words)
Joining, by = "id"
book_words

n是word詞頻,total是id review的總字數

Zipf’s law

freq_by_rank <- book_words %>% 
  group_by(id) %>% 
  mutate(rank = row_number(), 
         `term frequency` = n/total)
freq_by_rank

以一篇評論當作一篇文本來看,常見的stop words幾乎都是rank前幾名(相較其他字詞,出現頻率的rank)

rank_subset <- freq_by_rank %>% 
  filter(rank < 500,
         rank > 10)
lm(log10(`term frequency`) ~ log10(rank), data = rank_subset)

Call:
lm(formula = log10(`term frequency`) ~ log10(rank), data = rank_subset)

Coefficients:
(Intercept)  log10(rank)  
    -0.8067      -0.8256  

bind tf-idf functin

book_words <- book_words %>%
  bind_tf_idf(word,id, n)
book_words
book_words %>%
  #select(-total) %>%
  arrange(desc(tf_idf))
book_words %>%
  arrange(desc(tf_idf)) %>%
  mutate(word = factor(word, levels = rev(unique(word)))) %>% 
  top_n(15) %>% 
  ungroup %>%
  ggplot(aes(word, tf_idf)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "tf-idf") +
  coord_flip()
Selecting by tf_idf

Ch4.Relationships between words: n-grams and correlations

df_bigrams <- df %>%
  unnest_tokens(bigram, review_body, token = "ngrams", n = 2)
df_bigrams
df_bigrams %>%
  count(bigram, sort = TRUE)
bigrams_separated <- df_bigrams %>%
  separate(bigram, c("word1", "word2"), sep = " ")
bigrams_filtered <- bigrams_separated %>%
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word)
# new bigram counts:
bigram_counts <- bigrams_filtered %>% 
  count(word1, word2, sort = TRUE)
bigram_counts
bigrams_united <- bigrams_filtered %>%
  unite(bigram, word1, word2, sep = " ")
bigrams_united
df %>%
  unnest_tokens(trigram,review_body, token = "ngrams", n = 3) %>%
  separate(trigram, c("word1", "word2", "word3"), sep = " ") %>%
  filter(!word1 %in% stop_words$word,
         !word2 %in% stop_words$word,
         !word3 %in% stop_words$word) %>%
  count(word1, word2, word3, sort = TRUE)
bigram_tf_idf <- bigrams_united %>%
  count(id, bigram) %>%
  bind_tf_idf(bigram, id, n) %>%
  arrange(desc(tf_idf))
bigram_tf_idf

排在not後的字詞

AFINN <- get_sentiments("afinn")
not_words <- bigrams_separated %>%
  filter(word1 == "not") %>%
  inner_join(AFINN, by = c(word2 = "word")) %>%
  count(word2, score, sort = TRUE) %>%
  ungroup()
not_words
not_words %>%
  mutate(contribution = n * score) %>%
  arrange(desc(abs(contribution))) %>%
  head(20) %>%
  mutate(word2 = reorder(word2, contribution)) %>%
  ggplot(aes(word2, n * score, fill = n * score > 0)) +
  geom_col(show.legend = FALSE) +
  xlab("Words preceded by \"not\"") +
  ylab("Sentiment score * number of occurrences") +
  coord_flip()

不只not代表否定,加入其他否定字詞

negation_words <- c("not", "no", "never", "without")
negated_words <- bigrams_separated %>%
  filter(word1 %in% negation_words) %>%
  inner_join(AFINN, by = c(word2 = "word")) %>%
  count(word1, word2, score, sort = TRUE) %>%
  ungroup()
negated_words %>%
  mutate(contribution = n * score) %>%
  arrange(desc(abs(contribution))) %>%
  head(20) %>%
  mutate(word2 = reorder(word2, contribution)) %>%
  ggplot(aes(word2, n * score, fill = n * score > 0)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~word1, scales = "free_y") +  
  xlab("Words preceded by \"negated words\"") +
  ylab("Sentiment score * number of occurrences") +
  coord_flip()

#save.image("tidytext_hawai.RData")

Visualizing a network of bigrams with ggraph

bigram_graph <- bigram_counts %>%
  filter(n > 90) %>%
  graph_from_data_frame()
bigram_graph
IGRAPH 314b2cf DN-- 215 189 -- 
+ attr: name (v/c), n (e/n)
+ edges from 314b2cf (vertex names):
 [1] rainbow ->tower     hawaiian->village   hilton  ->hawaiian 
 [4] ocean   ->view      diamond ->head      waikiki ->beach    
 [7] tapa    ->tower     ali'i   ->tower     front   ->desk     
[10] resort  ->fee       walking ->distance  friday  ->night    
[13] abc     ->store     ala     ->moana     kalia   ->tower    
[16] hilton  ->honors    ocean   ->front     head    ->tower    
[19] highly  ->recommend abc     ->stores    super   ->pool     
[22] minute  ->walk      alii    ->tower     tropics ->bar      
+ ... omitted several edges
library(ggraph)
set.seed(2017)
# 前處理還須把數字去掉
ggraph(bigram_graph, layout = "fr") + 
  geom_edge_link() +
  geom_node_point() +
  geom_node_text(aes(label = name), vjust = 1, hjust = 1)

set.seed(2016)
a <- grid::arrow(type = "closed", length = unit(.15, "inches"))
ggraph(bigram_graph, layout = "fr") +
  geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
                 arrow = a, end_cap = circle(.07, 'inches')) +
  geom_node_point(color = "lightblue", size = 5) +
  geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
  theme_void()

Counting and correlating among reviews

library(widyr)
# count words co-occuring within sections
word_pairs <- tidy_df %>%
  pairwise_count(word, id, sort = TRUE)
word_pairs

看出在每一則評論中,最常一起出現的兩個字

也可以查看某一單字最常和誰一起出現

word_pairs %>%
  filter(item1 == "pool")

pairwise correlation

word_cors <- tidy_df %>%
  group_by(word) %>%
  filter(n() >= 20) %>%
  pairwise_cor(word, id, sort = TRUE)
word_cors

查看moana這個單字最常和誰一起出現

word_cors %>%
  filter(item1 == "moana")

以長條圖排序 並排四個字和其他字的correlation

word_cors %>%
  filter(item1 %in% c("moana", "louis", "waikiki", "shopping")) %>%
  group_by(item1) %>%
  top_n(6) %>%
  ungroup() %>%
  mutate(item2 = reorder(item2, correlation)) %>%
  ggplot(aes(item2, correlation)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ item1, scales = "free") +
  coord_flip()
Selecting by correlation

set.seed(2016)
word_cors %>%
  filter(correlation > .45) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
  geom_node_point(color = "lightblue", size = 5) +
  geom_node_text(aes(label = name), repel = TRUE) +
  theme_void()

Ch5.Converting to and from non-tidy formats

Casting tidy text data into a matrix

(不套用acq與stock)

以loughran將情緒分成六種

tidy_df %>%
  count(word) %>%
  inner_join(get_sentiments("loughran"), by = "word") %>%
  group_by(sentiment) %>%
  top_n(5, n) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n)) +
  geom_col() +
  coord_flip() +
  facet_wrap(~ sentiment, scales = "free") +
  ylab("Frequency of this word in the reviews")

Ch6.Topic modeling

LDA on reviews

df_lda <- LDA(df_dtm, k = 4, control = list(seed = 1234))
df_lda
A LDA_VEM topic model with 4 topics.

per-topicc-per-word probability

df_topics <- tidy(df_lda, matrix = "beta")
df_topics
top_terms <- df_topics %>%
  group_by(topic) %>%
  top_n(7, beta) %>%
  ungroup() %>%
  arrange(topic, -beta)
top_terms
top_terms %>%
  mutate(term = reorder(term, beta)) %>%
  ggplot(aes(term, beta, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  coord_flip()

感覺看不太出來review各四個主題有什麼明顯差別

Per-document classification

(因為評論原本就沒有分類,所以就沒有跑這章)

By word assignments: augment

assignments <- augment(df_lda, data = df_dtm)
assignments

(因為評論原本就沒有分類,所以沒有測試主題有沒有分類錯誤)

LS0tCnRpdGxlOiAiSGlsdG9uIEhhd2FpaWFuIFZpbGxhZ2UgVHJpcGFkdmlzb3IgcmV2aWV3IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgrkvb/nlKhUZXh0IE1pbmluZyB3aXRoIFIgLSBBIFRpZHkgQXBwcm9hY2ggY2gxfmNoNiDliIbmnpBIaWx0b24gSGF3YWlpYW4gVmlsbGFnZSBUcmlwYWR2aXNvciByZXZpZXc8YnI+CgpEYXRh5L6G5rqQOmh0dHBzOi8vZ2l0aHViLmNvbS9zdXNhbmxpMjAxNi9EYXRhLUFuYWx5c2lzLXdpdGgtUi9ibG9iL21hc3Rlci9IaWx0b25fSGF3YWlpYW5fVmlsbGFnZV9XYWlraWtpX0JlYWNoX1Jlc29ydC1Ib25vbHVsdV9PYWh1X0hhd2FpaV9fZW4uY3N2CgoKCiMjIENoMS5UaGUgdGlkeSB0ZXh0IGZvcm1hdAoKYGBge3J9CgpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHJlYWRyKQpsaWJyYXJ5KGx1YnJpZGF0ZSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShzdHJpbmdyKQpsaWJyYXJ5KHRpZHlyKQpsaWJyYXJ5KHNjYWxlcykKbGlicmFyeShicm9vbSkKbGlicmFyeShwdXJycikKbGlicmFyeSh3aWR5cikKbGlicmFyeShpZ3JhcGgpCmxpYnJhcnkoZ2dyYXBoKQpsaWJyYXJ5KFNub3diYWxsQykKbGlicmFyeSh3b3JkY2xvdWQpCmxpYnJhcnkocmVzaGFwZTIpCmxpYnJhcnkodG9waWNtb2RlbHMpCnRoZW1lX3NldCh0aGVtZV9taW5pbWFsKCkpCgpkZiA8LSByZWFkX2NzdigiSGlsdG9uX0hhd2FpaWFuX1ZpbGxhZ2VfV2Fpa2lraV9CZWFjaF9SZXNvcnQtSG9ub2x1bHVfT2FodV9IYXdhaWlfX2VuLmNzdiIpCmRmIDwtIGRmW2NvbXBsZXRlLmNhc2VzKGRmKSwgXQpkZiRpZCA8LSBjKDE6bnJvdyhkZikpCmRmJHJldmlld19kYXRlIDwtIGFzLkRhdGUoZGYkcmV2aWV3X2RhdGUsIGZvcm1hdCA9ICIlZC0lQi0leSIpCmRpbShkZik7IG1pbihkZiRyZXZpZXdfZGF0ZSk7IG1heChkZiRyZXZpZXdfZGF0ZSkKZGYkcmV2aWV3X2JvZHkgPSBnc3ViKCJbWzpkaWdpdDpdXSIsICIiLCBkZiRyZXZpZXdfYm9keSkgI+WOu+mZpOaVuOWtlyDpgb/lhY3lvozpnaLpl5zoga/lnJblh7rnj77lvojlpJrmlbjlrZcKCnRpZHlfZGYgPC0gZGYgJT4lCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCByZXZpZXdfYm9keSkKCmBgYAoKCgojIyMgCgpgYGB7cn0KZGF0YShzdG9wX3dvcmRzKQoKdGlkeV9kZiA8LSB0aWR5X2RmICU+JQogIGFudGlfam9pbihzdG9wX3dvcmRzKQpgYGAKCuWFqOmDqOipleirluS4reeahOWtl+mguwpgYGB7cn0KdGlkeV9kZiAlPiUKICBjb3VudCh3b3JkLCBzb3J0ID0gVFJVRSkgCmBgYAoKCgpgYGB7cn0KdGlkeV9kZiAlPiUKICBjb3VudCh3b3JkLCBzb3J0ID0gVFJVRSkgJT4lCiAgZmlsdGVyKG4gPiA1NTAwKSAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JQogIGdncGxvdChhZXMod29yZCwgbikpICsKICBnZW9tX2NvbCgpICsKICB4bGFiKE5VTEwpICsKICBjb29yZF9mbGlwKCkKYGBgCgoKCiMjIENoMi5TZW50aW1lbnQgYW5hbHlzaXMKCuS9v+eUqOWtl+WFuG5yYyDmn6XnnIvlnKjoqZXoq5bkuK3lh7rnj77lk6rkuptqb3nnmoTlrZcKYGBge3J9CgpucmNfam95IDwtIGdldF9zZW50aW1lbnRzKCJucmMiKSAlPiUgCiAgZmlsdGVyKHNlbnRpbWVudCA9PSAiam95IikKCnRpZHlfZGYgJT4lCiAgaW5uZXJfam9pbihucmNfam95KSAlPiUKICBjb3VudCh3b3JkLCBzb3J0ID0gVFJVRSkgI+azqOaEj+eahOm7nuaYr2JlYWNoLGZvb2QsZGlhbW9uZC4uLuS5n+WcqGpveeijoQpgYGAKCuWPr+S7peeci+WHuuS4u+imgeaYr+mXnOaWvOmjr+W6l+eahOaVtOa9lGNsZWFuLOWPi+WWhGZyaWVuZGx5LGhlbHBmdWws55Kw5aKD5oiW5bu656+J55qE576O6bqXcHJldHR5Li4uLuetiTxicj4KCgrljp90aWR5dGV4dOaWh+eroOevhOS+i+aYr+S7peWwj+iqquevh+eroOe3qOiZn+eCunjou7jvvIx56Lu454K65oOF57eS5YiG5pW45L6G5YGa5ZyW77yM6KGo54++5bCP6Kqq5oOF57eS5L6d5pOa5YqH5oOF5ryU6YCy6ICM55Si55Sf55qE6K6K5YyWPGJyPgrkuI3pgY5ob3RlbCByZXZpZXflsLHopoHorormiJDku6V46Lu454K65pel5pyf5LqGCgpgYGB7cn0KI+aXpeacn+W+njIwMDItMDMtMjF+MjAxOC0wOC0wMgpzdW1tYXJ5KHRpZHlfZGYkcmV2aWV3X2RhdGUpIApgYGAKCmBgYHtyfQpzZW50aW1lbnQgPC0gdGlkeV9kZiAlPiUKICBpbm5lcl9qb2luKGdldF9zZW50aW1lbnRzKCJiaW5nIikpICU+JQogIGNvdW50KGlkLHNlbnRpbWVudCxyZXZpZXdfZGF0ZSkgJT4lIAogIHNwcmVhZChzZW50aW1lbnQsIG4sIGZpbGwgPSAwKSAlPiUKICBtdXRhdGUoc2VudGltZW50ID0gcG9zaXRpdmUgLSBuZWdhdGl2ZSkKCiNjb2wgc2VudGltZW5054K657i95YiG5pW4CgpgYGAKCgpgYGB7cn0Kc2VuX2J5eWVhciA8LSBzZW50aW1lbnQgICU+JSBncm91cF9ieSh5ZWFyKHJldmlld19kYXRlKSkgJT4lIAogIHN1bW1hcmlzZSgKICBzZW50aW1lbnQgPSBtZWFuKHNlbnRpbWVudCkpJT4lIAogIGFzLmRhdGEuZnJhbWUoKQpgYGAKCgoKYGBge3J9CnNlbl9ieXllYXIkYHllYXIocmV2aWV3X2RhdGUpYCA9IGFzLmNoYXJhY3RlcihzZW5fYnl5ZWFyJGB5ZWFyKHJldmlld19kYXRlKWApCnNlbl9ieXllYXIkYHllYXIocmV2aWV3X2RhdGUpYCA8LSBhcy5EYXRlKHNlbl9ieXllYXIkYHllYXIocmV2aWV3X2RhdGUpYCxmb3JtYXQgPSAiJVkiKQpgYGAKCgpgYGB7cn0KZ2dwbG90KHNlbl9ieXllYXIsIGFlcyhgeWVhcihyZXZpZXdfZGF0ZSlgLCBzZW50aW1lbnQpKSArCiAgZ2VvbV9saW5lKCkrCiAgc2NhbGVfeF9kYXRlKGRhdGVfYnJlYWtzID0gIjEgeWVhciIsIGRhdGVfbGFiZWxzID0gIiVZIikKYGBgCgrpoa/npLroqZXoq5blubPlnYfmg4Xnt5LliIbmlbjlnKgyMDA25bm05pmC5pyJ5LiL6ZmN6Lao5Yui77yM5Zyo5pyA6L+R55qEMjAxOOeUmuiHs+aYr+acgOS9jum7ngoKCgojIyMgbW9zdCBjb21tb24gcG9zaXRpdmUgYW5kIG5lZ2F0aXZlIHdvcmRzCgoKYGBge3J9CmJpbmdfd29yZF9jb3VudHMgPC0gdGlkeV9kZiAlPiUKICBpbm5lcl9qb2luKGdldF9zZW50aW1lbnRzKCJiaW5nIikpICU+JQogIGNvdW50KHdvcmQsIHNlbnRpbWVudCwgc29ydCA9IFRSVUUpICU+JQogIHVuZ3JvdXAoKQoKYmluZ193b3JkX2NvdW50cwpgYGAKCgpgYGB7cn0KYmluZ193b3JkX2NvdW50cyAlPiUKICBncm91cF9ieShzZW50aW1lbnQpICU+JQogIHRvcF9uKDEwKSAlPiUKICB1bmdyb3VwKCkgJT4lCiAgbXV0YXRlKHdvcmQgPSByZW9yZGVyKHdvcmQsIG4pKSAlPiUKICBnZ3Bsb3QoYWVzKHdvcmQsIG4sIGZpbGwgPSBzZW50aW1lbnQpKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGZhY2V0X3dyYXAofnNlbnRpbWVudCwgc2NhbGVzID0gImZyZWVfeSIpICsKICBsYWJzKHkgPSAiQ29udHJpYnV0aW9uIHRvIHNlbnRpbWVudCIsCiAgICAgICB4ID0gTlVMTCkgKwogIGNvb3JkX2ZsaXAoKQpgYGAKCiMjIyB3b3JkIGNsb3VkCgpgYGB7cn0KdGlkeV9kZiAlPiUKICBhbnRpX2pvaW4oc3RvcF93b3JkcykgJT4lCiAgY291bnQod29yZCkgJT4lCiAgd2l0aCh3b3JkY2xvdWQod29yZCwgbiwgbWF4LndvcmRzID0gMTAwKSkKYGBgCgoKYGBge3J9CnRpZHlfZGYgJT4lCiAgaW5uZXJfam9pbihnZXRfc2VudGltZW50cygiYmluZyIpKSAlPiUKICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKSAlPiUKICBhY2FzdCh3b3JkIH4gc2VudGltZW50LCB2YWx1ZS52YXIgPSAibiIsIGZpbGwgPSAwKSAlPiUKICBjb21wYXJpc29uLmNsb3VkKGNvbG9ycyA9IGMoImdyYXkyMCIsICJncmF5ODAiKSwKICAgICAgICAgICAgICAgICAgIG1heC53b3JkcyA9IDEwMCkKYGBgCgojIyBDaDMuQW5hbHl6aW5nIHdvcmQgYW5kIGRvY3VtZW50IGZyZXF1ZW5jeTogdGYtaWRmCgpgYGB7cn0KI+eci+eci+acquWOu+mZpHN0b3Agd29yZHPliY3nmoR0ZXJtIGZyZXF1ZW5jeQpkZl93b3JkcyA8LSBkZiAlPiUKICB1bm5lc3RfdG9rZW5zKHdvcmQsIHJldmlld19ib2R5KSAlPiUKICBjb3VudChpZCwgd29yZCwgc29ydCA9IFRSVUUpICU+JQogIHVuZ3JvdXAoKQoKdG90YWxfd29yZHMgPC0gZGZfd29yZHMgJT4lIAogIGdyb3VwX2J5KGlkKSAlPiUgCiAgc3VtbWFyaXplKHRvdGFsID0gc3VtKG4pKQoKYm9va193b3JkcyA8LSBsZWZ0X2pvaW4oZGZfd29yZHMsIHRvdGFsX3dvcmRzKQoKYm9va193b3JkcwpgYGAKCm7mmK93b3Jk6Kme6aC7LHRvdGFs5pivaWQgcmV2aWV355qE57i95a2X5pW4CgoKIyMjIFppcGYncyBsYXcKCmBgYHtyfQpmcmVxX2J5X3JhbmsgPC0gYm9va193b3JkcyAlPiUgCiAgZ3JvdXBfYnkoaWQpICU+JSAKICBtdXRhdGUocmFuayA9IHJvd19udW1iZXIoKSwgCiAgICAgICAgIGB0ZXJtIGZyZXF1ZW5jeWAgPSBuL3RvdGFsKQoKZnJlcV9ieV9yYW5rCmBgYAoK5Lul5LiA56+H6KmV6KuW55W25L2c5LiA56+H5paH5pys5L6G55yL77yM5bi46KaL55qEc3RvcCB3b3Jkc+W5vuS5jumDveaYr3JhbmvliY3lub7lkI0o55u46LyD5YW25LuW5a2X6Kme77yM5Ye654++6aC7546H55qEcmFuaykKCgoKYGBge3J9CnJhbmtfc3Vic2V0IDwtIGZyZXFfYnlfcmFuayAlPiUgCiAgZmlsdGVyKHJhbmsgPCA1MDAsCiAgICAgICAgIHJhbmsgPiAxMCkKCmxtKGxvZzEwKGB0ZXJtIGZyZXF1ZW5jeWApIH4gbG9nMTAocmFuayksIGRhdGEgPSByYW5rX3N1YnNldCkKYGBgCgoKCiMjIyBiaW5kIHRmLWlkZiBmdW5jdGluCgpgYGB7cn0KYm9va193b3JkcyA8LSBib29rX3dvcmRzICU+JQogIGJpbmRfdGZfaWRmKHdvcmQsaWQsIG4pCmJvb2tfd29yZHMKYGBgCgpgYGB7cn0KYm9va193b3JkcyAlPiUKICAjc2VsZWN0KC10b3RhbCkgJT4lCiAgYXJyYW5nZShkZXNjKHRmX2lkZikpCmBgYAoKCgpgYGB7cn0KYm9va193b3JkcyAlPiUKICBhcnJhbmdlKGRlc2ModGZfaWRmKSkgJT4lCiAgbXV0YXRlKHdvcmQgPSBmYWN0b3Iod29yZCwgbGV2ZWxzID0gcmV2KHVuaXF1ZSh3b3JkKSkpKSAlPiUgCiAgdG9wX24oMTUpICU+JSAKICB1bmdyb3VwICU+JQogIGdncGxvdChhZXMod29yZCwgdGZfaWRmKSkgKwogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBsYWJzKHggPSBOVUxMLCB5ID0gInRmLWlkZiIpICsKICBjb29yZF9mbGlwKCkKYGBgCgoKIyMgQ2g0LlJlbGF0aW9uc2hpcHMgYmV0d2VlbiB3b3Jkczogbi1ncmFtcyBhbmQgY29ycmVsYXRpb25zCgpgYGB7cn0KZGZfYmlncmFtcyA8LSBkZiAlPiUKICB1bm5lc3RfdG9rZW5zKGJpZ3JhbSwgcmV2aWV3X2JvZHksIHRva2VuID0gIm5ncmFtcyIsIG4gPSAyKQoKZGZfYmlncmFtcwpgYGAKCmBgYHtyfQpkZl9iaWdyYW1zICU+JQogIGNvdW50KGJpZ3JhbSwgc29ydCA9IFRSVUUpCmBgYAoKYGBge3J9CmJpZ3JhbXNfc2VwYXJhdGVkIDwtIGRmX2JpZ3JhbXMgJT4lCiAgc2VwYXJhdGUoYmlncmFtLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpCgpiaWdyYW1zX2ZpbHRlcmVkIDwtIGJpZ3JhbXNfc2VwYXJhdGVkICU+JQogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JQogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpCgojIG5ldyBiaWdyYW0gY291bnRzOgpiaWdyYW1fY291bnRzIDwtIGJpZ3JhbXNfZmlsdGVyZWQgJT4lIAogIGNvdW50KHdvcmQxLCB3b3JkMiwgc29ydCA9IFRSVUUpCgpiaWdyYW1fY291bnRzCmBgYAoKCmBgYHtyfQpiaWdyYW1zX3VuaXRlZCA8LSBiaWdyYW1zX2ZpbHRlcmVkICU+JQogIHVuaXRlKGJpZ3JhbSwgd29yZDEsIHdvcmQyLCBzZXAgPSAiICIpCgpiaWdyYW1zX3VuaXRlZApgYGAKCmBgYHtyfQpkZiAlPiUKICB1bm5lc3RfdG9rZW5zKHRyaWdyYW0scmV2aWV3X2JvZHksIHRva2VuID0gIm5ncmFtcyIsIG4gPSAzKSAlPiUKICBzZXBhcmF0ZSh0cmlncmFtLCBjKCJ3b3JkMSIsICJ3b3JkMiIsICJ3b3JkMyIpLCBzZXAgPSAiICIpICU+JQogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQsCiAgICAgICAgICF3b3JkMiAlaW4lIHN0b3Bfd29yZHMkd29yZCwKICAgICAgICAgIXdvcmQzICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUKICBjb3VudCh3b3JkMSwgd29yZDIsIHdvcmQzLCBzb3J0ID0gVFJVRSkKYGBgCgpgYGB7cn0KYmlncmFtX3RmX2lkZiA8LSBiaWdyYW1zX3VuaXRlZCAlPiUKICBjb3VudChpZCwgYmlncmFtKSAlPiUKICBiaW5kX3RmX2lkZihiaWdyYW0sIGlkLCBuKSAlPiUKICBhcnJhbmdlKGRlc2ModGZfaWRmKSkKCmJpZ3JhbV90Zl9pZGYKYGBgCgrmjpLlnKhub3TlvoznmoTlrZfoqZ4gCgpgYGB7cn0KQUZJTk4gPC0gZ2V0X3NlbnRpbWVudHMoImFmaW5uIikKCm5vdF93b3JkcyA8LSBiaWdyYW1zX3NlcGFyYXRlZCAlPiUKICBmaWx0ZXIod29yZDEgPT0gIm5vdCIpICU+JQogIGlubmVyX2pvaW4oQUZJTk4sIGJ5ID0gYyh3b3JkMiA9ICJ3b3JkIikpICU+JQogIGNvdW50KHdvcmQyLCBzY29yZSwgc29ydCA9IFRSVUUpICU+JQogIHVuZ3JvdXAoKQoKbm90X3dvcmRzCmBgYAoKCmBgYHtyfQpub3Rfd29yZHMgJT4lCiAgbXV0YXRlKGNvbnRyaWJ1dGlvbiA9IG4gKiBzY29yZSkgJT4lCiAgYXJyYW5nZShkZXNjKGFicyhjb250cmlidXRpb24pKSkgJT4lCiAgaGVhZCgyMCkgJT4lCiAgbXV0YXRlKHdvcmQyID0gcmVvcmRlcih3b3JkMiwgY29udHJpYnV0aW9uKSkgJT4lCiAgZ2dwbG90KGFlcyh3b3JkMiwgbiAqIHNjb3JlLCBmaWxsID0gbiAqIHNjb3JlID4gMCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgeGxhYigiV29yZHMgcHJlY2VkZWQgYnkgXCJub3RcIiIpICsKICB5bGFiKCJTZW50aW1lbnQgc2NvcmUgKiBudW1iZXIgb2Ygb2NjdXJyZW5jZXMiKSArCiAgY29vcmRfZmxpcCgpCmBgYAoK5LiN5Y+qbm905Luj6KGo5ZCm5a6a77yM5Yqg5YWl5YW25LuW5ZCm5a6a5a2X6KmeCmBgYHtyfQpuZWdhdGlvbl93b3JkcyA8LSBjKCJub3QiLCAibm8iLCAibmV2ZXIiLCAid2l0aG91dCIpCgpuZWdhdGVkX3dvcmRzIDwtIGJpZ3JhbXNfc2VwYXJhdGVkICU+JQogIGZpbHRlcih3b3JkMSAlaW4lIG5lZ2F0aW9uX3dvcmRzKSAlPiUKICBpbm5lcl9qb2luKEFGSU5OLCBieSA9IGMod29yZDIgPSAid29yZCIpKSAlPiUKICBjb3VudCh3b3JkMSwgd29yZDIsIHNjb3JlLCBzb3J0ID0gVFJVRSkgJT4lCiAgdW5ncm91cCgpCmBgYAoKYGBge3J9Cm5lZ2F0ZWRfd29yZHMgJT4lCiAgbXV0YXRlKGNvbnRyaWJ1dGlvbiA9IG4gKiBzY29yZSkgJT4lCiAgYXJyYW5nZShkZXNjKGFicyhjb250cmlidXRpb24pKSkgJT4lCiAgaGVhZCgyMCkgJT4lCiAgbXV0YXRlKHdvcmQyID0gcmVvcmRlcih3b3JkMiwgY29udHJpYnV0aW9uKSkgJT4lCiAgZ2dwbG90KGFlcyh3b3JkMiwgbiAqIHNjb3JlLCBmaWxsID0gbiAqIHNjb3JlID4gMCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh+d29yZDEsIHNjYWxlcyA9ICJmcmVlX3kiKSArICAKICB4bGFiKCJXb3JkcyBwcmVjZWRlZCBieSBcIm5lZ2F0ZWQgd29yZHNcIiIpICsKICB5bGFiKCJTZW50aW1lbnQgc2NvcmUgKiBudW1iZXIgb2Ygb2NjdXJyZW5jZXMiKSArCiAgY29vcmRfZmxpcCgpCmBgYAoKCmBgYHtyfQojc2F2ZS5pbWFnZSgidGlkeXRleHRfaGF3YWkuUkRhdGEiKQpgYGAKCgojIyMgVmlzdWFsaXppbmcgYSBuZXR3b3JrIG9mIGJpZ3JhbXMgd2l0aCBnZ3JhcGgKCmBgYHtyfQpiaWdyYW1fZ3JhcGggPC0gYmlncmFtX2NvdW50cyAlPiUKICBmaWx0ZXIobiA+IDkwKSAlPiUKICBncmFwaF9mcm9tX2RhdGFfZnJhbWUoKQoKYmlncmFtX2dyYXBoCmBgYAoKCmBgYHtyfQpsaWJyYXJ5KGdncmFwaCkKc2V0LnNlZWQoMjAxNykKCiMg5YmN6JmV55CG6YKE6aCI5oqK5pW45a2X5Y675o6JCmdncmFwaChiaWdyYW1fZ3JhcGgsIGxheW91dCA9ICJmciIpICsgCiAgZ2VvbV9lZGdlX2xpbmsoKSArCiAgZ2VvbV9ub2RlX3BvaW50KCkgKwogIGdlb21fbm9kZV90ZXh0KGFlcyhsYWJlbCA9IG5hbWUpLCB2anVzdCA9IDEsIGhqdXN0ID0gMSkKYGBgCgpgYGB7cn0Kc2V0LnNlZWQoMjAxNikKCmEgPC0gZ3JpZDo6YXJyb3codHlwZSA9ICJjbG9zZWQiLCBsZW5ndGggPSB1bml0KC4xNSwgImluY2hlcyIpKQoKZ2dyYXBoKGJpZ3JhbV9ncmFwaCwgbGF5b3V0ID0gImZyIikgKwogIGdlb21fZWRnZV9saW5rKGFlcyhlZGdlX2FscGhhID0gbiksIHNob3cubGVnZW5kID0gRkFMU0UsCiAgICAgICAgICAgICAgICAgYXJyb3cgPSBhLCBlbmRfY2FwID0gY2lyY2xlKC4wNywgJ2luY2hlcycpKSArCiAgZ2VvbV9ub2RlX3BvaW50KGNvbG9yID0gImxpZ2h0Ymx1ZSIsIHNpemUgPSA1KSArCiAgZ2VvbV9ub2RlX3RleHQoYWVzKGxhYmVsID0gbmFtZSksIHZqdXN0ID0gMSwgaGp1c3QgPSAxKSArCiAgdGhlbWVfdm9pZCgpCmBgYAoKCgojIyMgQ291bnRpbmcgYW5kIGNvcnJlbGF0aW5nIGFtb25nIHJldmlld3MKCmBgYHtyfQpsaWJyYXJ5KHdpZHlyKQoKIyBjb3VudCB3b3JkcyBjby1vY2N1cmluZyB3aXRoaW4gc2VjdGlvbnMKd29yZF9wYWlycyA8LSB0aWR5X2RmICU+JQogIHBhaXJ3aXNlX2NvdW50KHdvcmQsIGlkLCBzb3J0ID0gVFJVRSkKCndvcmRfcGFpcnMKYGBgCgrnnIvlh7rlnKjmr4/kuIDliYfoqZXoq5bkuK3vvIzmnIDluLjkuIDotbflh7rnj77nmoTlhanlgIvlrZc8YnI+CgoK5Lmf5Y+v5Lul5p+l55yL5p+Q5LiA5Zau5a2X5pyA5bi45ZKM6Kqw5LiA6LW35Ye654++CgpgYGB7cn0Kd29yZF9wYWlycyAlPiUKICBmaWx0ZXIoaXRlbTEgPT0gInBvb2wiKQpgYGAKCiMjIyBwYWlyd2lzZSBjb3JyZWxhdGlvbgoKYGBge3J9CndvcmRfY29ycyA8LSB0aWR5X2RmICU+JQogIGdyb3VwX2J5KHdvcmQpICU+JQogIGZpbHRlcihuKCkgPj0gMjApICU+JQogIHBhaXJ3aXNlX2Nvcih3b3JkLCBpZCwgc29ydCA9IFRSVUUpCgp3b3JkX2NvcnMKYGBgCgoK5p+l55yLbW9hbmHpgJnlgIvllq7lrZfmnIDluLjlkozoqrDkuIDotbflh7rnj74KYGBge3J9CndvcmRfY29ycyAlPiUKICBmaWx0ZXIoaXRlbTEgPT0gIm1vYW5hIikKYGBgCgoK5Lul6ZW35qKd5ZyW5o6S5bqPIOS4puaOkuWbm+WAi+Wtl+WSjOWFtuS7luWtl+eahGNvcnJlbGF0aW9uCmBgYHtyfQp3b3JkX2NvcnMgJT4lCiAgZmlsdGVyKGl0ZW0xICVpbiUgYygibW9hbmEiLCAibG91aXMiLCAid2Fpa2lraSIsICJzaG9wcGluZyIpKSAlPiUKICBncm91cF9ieShpdGVtMSkgJT4lCiAgdG9wX24oNikgJT4lCiAgdW5ncm91cCgpICU+JQogIG11dGF0ZShpdGVtMiA9IHJlb3JkZXIoaXRlbTIsIGNvcnJlbGF0aW9uKSkgJT4lCiAgZ2dwbG90KGFlcyhpdGVtMiwgY29ycmVsYXRpb24pKSArCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIpICsKICBmYWNldF93cmFwKH4gaXRlbTEsIHNjYWxlcyA9ICJmcmVlIikgKwogIGNvb3JkX2ZsaXAoKQpgYGAKCgpgYGB7cn0Kc2V0LnNlZWQoMjAxNikKCndvcmRfY29ycyAlPiUKICBmaWx0ZXIoY29ycmVsYXRpb24gPiAuNDUpICU+JQogIGdyYXBoX2Zyb21fZGF0YV9mcmFtZSgpICU+JQogIGdncmFwaChsYXlvdXQgPSAiZnIiKSArCiAgZ2VvbV9lZGdlX2xpbmsoYWVzKGVkZ2VfYWxwaGEgPSBjb3JyZWxhdGlvbiksIHNob3cubGVnZW5kID0gRkFMU0UpICsKICBnZW9tX25vZGVfcG9pbnQoY29sb3IgPSAibGlnaHRibHVlIiwgc2l6ZSA9IDUpICsKICBnZW9tX25vZGVfdGV4dChhZXMobGFiZWwgPSBuYW1lKSwgcmVwZWwgPSBUUlVFKSArCiAgdGhlbWVfdm9pZCgpCmBgYAoKCiMjIENoNS5Db252ZXJ0aW5nIHRvIGFuZCBmcm9tIG5vbi10aWR5IGZvcm1hdHMKCiMjIyBDYXN0aW5nIHRpZHkgdGV4dCBkYXRhIGludG8gYSBtYXRyaXgKCmBgYHtyfQpkZl9kdG0gPC0gdGlkeV9kZiAlPiUKICBjb3VudChpZCwgd29yZCkgJT4lCiAgY2FzdF9kdG0oaWQsIHdvcmQsIG4pCgpkZl9kdG0KYGBgCgoo5LiN5aWX55SoYWNx6IiHc3RvY2spCgoKIyMjIyDku6Vsb3VnaHJhbuWwh+aDhee3kuWIhuaIkOWFreeorgoKYGBge3J9CnRpZHlfZGYgJT4lCiAgY291bnQod29yZCkgJT4lCiAgaW5uZXJfam9pbihnZXRfc2VudGltZW50cygibG91Z2hyYW4iKSwgYnkgPSAid29yZCIpICU+JQogIGdyb3VwX2J5KHNlbnRpbWVudCkgJT4lCiAgdG9wX24oNSwgbikgJT4lCiAgdW5ncm91cCgpICU+JQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcih3b3JkLCBuKSkgJT4lCiAgZ2dwbG90KGFlcyh3b3JkLCBuKSkgKwogIGdlb21fY29sKCkgKwogIGNvb3JkX2ZsaXAoKSArCiAgZmFjZXRfd3JhcCh+IHNlbnRpbWVudCwgc2NhbGVzID0gImZyZWUiKSArCiAgeWxhYigiRnJlcXVlbmN5IG9mIHRoaXMgd29yZCBpbiB0aGUgcmV2aWV3cyIpCmBgYAoKCiMjIENoNi5Ub3BpYyBtb2RlbGluZwoKCiMjIyBMREEgb24gcmV2aWV3cwoKYGBge3J9CmRmX2xkYSA8LSBMREEoZGZfZHRtLCBrID0gNCwgY29udHJvbCA9IGxpc3Qoc2VlZCA9IDEyMzQpKQpkZl9sZGEKYGBgCgpwZXItdG9waWNjLXBlci13b3JkIHByb2JhYmlsaXR5CgpgYGB7cn0KZGZfdG9waWNzIDwtIHRpZHkoZGZfbGRhLCBtYXRyaXggPSAiYmV0YSIpCmRmX3RvcGljcwpgYGAKCgpgYGB7cn0KdG9wX3Rlcm1zIDwtIGRmX3RvcGljcyAlPiUKICBncm91cF9ieSh0b3BpYykgJT4lCiAgdG9wX24oNywgYmV0YSkgJT4lCiAgdW5ncm91cCgpICU+JQogIGFycmFuZ2UodG9waWMsIC1iZXRhKQoKdG9wX3Rlcm1zCmBgYAoKYGBge3J9CnRvcF90ZXJtcyAlPiUKICBtdXRhdGUodGVybSA9IHJlb3JkZXIodGVybSwgYmV0YSkpICU+JQogIGdncGxvdChhZXModGVybSwgYmV0YSwgZmlsbCA9IGZhY3Rvcih0b3BpYykpKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGZhY2V0X3dyYXAofiB0b3BpYywgc2NhbGVzID0gImZyZWUiKSArCiAgY29vcmRfZmxpcCgpCmBgYAoKCuaEn+imuueci+S4jeWkquWHuuS+hnJldmlld+WQhOWbm+WAi+S4u+mhjOacieS7gOm6vOaYjumhr+W3ruWIpQoKCiMjIyBQZXItZG9jdW1lbnQgY2xhc3NpZmljYXRpb24KCijlm6DngrroqZXoq5bljp/mnKzlsLHmspLmnInliIbpoZ7vvIzmiYDku6XlsLHmspLmnInot5HpgJnnq6ApCgoKIyMjIEJ5IHdvcmQgYXNzaWdubWVudHM6IGF1Z21lbnQKCmBgYHtyfQphc3NpZ25tZW50cyA8LSBhdWdtZW50KGRmX2xkYSwgZGF0YSA9IGRmX2R0bSkKYXNzaWdubWVudHMKYGBgCgoo5Zug54K66KmV6KuW5Y6f5pys5bCx5rKS5pyJ5YiG6aGe77yM5omA5Lul5rKS5pyJ5ris6Kmm5Li76aGM5pyJ5rKS5pyJ5YiG6aGe6Yyv6KqkKQo=