使用Text Mining with R - A Tidy Approach ch1~ch6 分析Hilton Hawaiian Village Tripadvisor review
Ch1.The tidy text format
library(dplyr)
library(readr)
library(lubridate)
library(ggplot2)
library(tidytext)
library(tidyverse)
library(stringr)
library(tidyr)
library(scales)
library(broom)
library(purrr)
library(widyr)
library(igraph)
library(ggraph)
library(SnowballC)
library(wordcloud)
library(reshape2)
library(topicmodels)
theme_set(theme_minimal())
df <- read_csv("Hilton_Hawaiian_Village_Waikiki_Beach_Resort-Honolulu_Oahu_Hawaii__en.csv")
Parsed with column specification:
cols(
review_body = [31mcol_character()[39m,
review_date = [31mcol_character()[39m
)
df <- df[complete.cases(df), ]
df$id <- c(1:nrow(df))
df$review_date <- as.Date(df$review_date, format = "%d-%B-%y")
dim(df); min(df$review_date); max(df$review_date)
[1] 13701 3
[1] "2002-03-21"
[1] "2018-08-02"
df$review_body = gsub("[[:digit:]]", "", df$review_body) #去除數字 避免後面關聯圖出現很多數字
tidy_df <- df %>%
unnest_tokens(word, review_body)
data(stop_words)
tidy_df <- tidy_df %>%
anti_join(stop_words)
Joining, by = "word"
全部評論中的字頻
tidy_df %>%
count(word, sort = TRUE)
tidy_df %>%
count(word, sort = TRUE) %>%
filter(n > 5500) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n)) +
geom_col() +
xlab(NULL) +
coord_flip()

Ch2.Sentiment analysis
使用字典nrc 查看在評論中出現哪些joy的字
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
tidy_df %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE) #注意的點是beach,food,diamond...也在joy裡
Joining, by = "word"
可以看出主要是關於飯店的整潔clean,友善friendly,helpful,環境或建築的美麗pretty….等
原tidytext文章範例是以小說篇章編號為x軸,y軸為情緒分數來做圖,表現小說情緒依據劇情演進而產生的變化
不過hotel review就要變成以x軸為日期了
#日期從2002-03-21~2018-08-02
summary(tidy_df$review_date)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2002-03-21" "2012-01-05" "2014-04-28" "2013-08-26" "2016-03-14" "2018-08-02"
sentiment <- tidy_df %>%
inner_join(get_sentiments("bing")) %>%
count(id,sentiment,review_date) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)
Joining, by = "word"
#col sentiment為總分數
sen_byyear <- sentiment %>% group_by(year(review_date)) %>%
summarise(
sentiment = mean(sentiment))%>%
as.data.frame()
sen_byyear$`year(review_date)` = as.character(sen_byyear$`year(review_date)`)
sen_byyear$`year(review_date)` <- as.Date(sen_byyear$`year(review_date)`,format = "%Y")
ggplot(sen_byyear, aes(`year(review_date)`, sentiment)) +
geom_line()+
scale_x_date(date_breaks = "1 year", date_labels = "%Y")

顯示評論平均情緒分數在2006年時有下降趨勢,在最近的2018甚至是最低點
most common positive and negative words
bing_word_counts <- tidy_df %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
ungroup()
Joining, by = "word"
bing_word_counts
bing_word_counts %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
coord_flip()
Selecting by n

word cloud
tidy_df %>%
anti_join(stop_words) %>%
count(word) %>%
with(wordcloud(word, n, max.words = 100))
Joining, by = "word"

tidy_df %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("gray20", "gray80"),
max.words = 100)
Joining, by = "word"

Ch3.Analyzing word and document frequency: tf-idf
#看看未去除stop words前的term frequency
df_words <- df %>%
unnest_tokens(word, review_body) %>%
count(id, word, sort = TRUE) %>%
ungroup()
total_words <- df_words %>%
group_by(id) %>%
summarize(total = sum(n))
book_words <- left_join(df_words, total_words)
Joining, by = "id"
book_words
n是word詞頻,total是id review的總字數
Zipf’s law
freq_by_rank <- book_words %>%
group_by(id) %>%
mutate(rank = row_number(),
`term frequency` = n/total)
freq_by_rank
以一篇評論當作一篇文本來看,常見的stop words幾乎都是rank前幾名(相較其他字詞,出現頻率的rank)
rank_subset <- freq_by_rank %>%
filter(rank < 500,
rank > 10)
lm(log10(`term frequency`) ~ log10(rank), data = rank_subset)
Call:
lm(formula = log10(`term frequency`) ~ log10(rank), data = rank_subset)
Coefficients:
(Intercept) log10(rank)
-0.8067 -0.8256
bind tf-idf functin
book_words <- book_words %>%
bind_tf_idf(word,id, n)
book_words
book_words %>%
#select(-total) %>%
arrange(desc(tf_idf))
book_words %>%
arrange(desc(tf_idf)) %>%
mutate(word = factor(word, levels = rev(unique(word)))) %>%
top_n(15) %>%
ungroup %>%
ggplot(aes(word, tf_idf)) +
geom_col(show.legend = FALSE) +
labs(x = NULL, y = "tf-idf") +
coord_flip()
Selecting by tf_idf

Ch4.Relationships between words: n-grams and correlations
df_bigrams <- df %>%
unnest_tokens(bigram, review_body, token = "ngrams", n = 2)
df_bigrams
df_bigrams %>%
count(bigram, sort = TRUE)
bigrams_separated <- df_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ")
bigrams_filtered <- bigrams_separated %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word)
# new bigram counts:
bigram_counts <- bigrams_filtered %>%
count(word1, word2, sort = TRUE)
bigram_counts
bigrams_united <- bigrams_filtered %>%
unite(bigram, word1, word2, sep = " ")
bigrams_united
df %>%
unnest_tokens(trigram,review_body, token = "ngrams", n = 3) %>%
separate(trigram, c("word1", "word2", "word3"), sep = " ") %>%
filter(!word1 %in% stop_words$word,
!word2 %in% stop_words$word,
!word3 %in% stop_words$word) %>%
count(word1, word2, word3, sort = TRUE)
bigram_tf_idf <- bigrams_united %>%
count(id, bigram) %>%
bind_tf_idf(bigram, id, n) %>%
arrange(desc(tf_idf))
bigram_tf_idf
排在not後的字詞
AFINN <- get_sentiments("afinn")
not_words <- bigrams_separated %>%
filter(word1 == "not") %>%
inner_join(AFINN, by = c(word2 = "word")) %>%
count(word2, score, sort = TRUE) %>%
ungroup()
not_words
not_words %>%
mutate(contribution = n * score) %>%
arrange(desc(abs(contribution))) %>%
head(20) %>%
mutate(word2 = reorder(word2, contribution)) %>%
ggplot(aes(word2, n * score, fill = n * score > 0)) +
geom_col(show.legend = FALSE) +
xlab("Words preceded by \"not\"") +
ylab("Sentiment score * number of occurrences") +
coord_flip()

不只not代表否定,加入其他否定字詞
negation_words <- c("not", "no", "never", "without")
negated_words <- bigrams_separated %>%
filter(word1 %in% negation_words) %>%
inner_join(AFINN, by = c(word2 = "word")) %>%
count(word1, word2, score, sort = TRUE) %>%
ungroup()
negated_words %>%
mutate(contribution = n * score) %>%
arrange(desc(abs(contribution))) %>%
head(20) %>%
mutate(word2 = reorder(word2, contribution)) %>%
ggplot(aes(word2, n * score, fill = n * score > 0)) +
geom_col(show.legend = FALSE) +
facet_wrap(~word1, scales = "free_y") +
xlab("Words preceded by \"negated words\"") +
ylab("Sentiment score * number of occurrences") +
coord_flip()

#save.image("tidytext_hawai.RData")
Visualizing a network of bigrams with ggraph
bigram_graph <- bigram_counts %>%
filter(n > 90) %>%
graph_from_data_frame()
bigram_graph
IGRAPH 314b2cf DN-- 215 189 --
+ attr: name (v/c), n (e/n)
+ edges from 314b2cf (vertex names):
[1] rainbow ->tower hawaiian->village hilton ->hawaiian
[4] ocean ->view diamond ->head waikiki ->beach
[7] tapa ->tower ali'i ->tower front ->desk
[10] resort ->fee walking ->distance friday ->night
[13] abc ->store ala ->moana kalia ->tower
[16] hilton ->honors ocean ->front head ->tower
[19] highly ->recommend abc ->stores super ->pool
[22] minute ->walk alii ->tower tropics ->bar
+ ... omitted several edges
library(ggraph)
set.seed(2017)
# 前處理還須把數字去掉
ggraph(bigram_graph, layout = "fr") +
geom_edge_link() +
geom_node_point() +
geom_node_text(aes(label = name), vjust = 1, hjust = 1)

set.seed(2016)
a <- grid::arrow(type = "closed", length = unit(.15, "inches"))
ggraph(bigram_graph, layout = "fr") +
geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
arrow = a, end_cap = circle(.07, 'inches')) +
geom_node_point(color = "lightblue", size = 5) +
geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
theme_void()

Counting and correlating among reviews
library(widyr)
# count words co-occuring within sections
word_pairs <- tidy_df %>%
pairwise_count(word, id, sort = TRUE)
word_pairs
看出在每一則評論中,最常一起出現的兩個字
也可以查看某一單字最常和誰一起出現
word_pairs %>%
filter(item1 == "pool")
pairwise correlation
word_cors <- tidy_df %>%
group_by(word) %>%
filter(n() >= 20) %>%
pairwise_cor(word, id, sort = TRUE)
word_cors
查看moana這個單字最常和誰一起出現
word_cors %>%
filter(item1 == "moana")
以長條圖排序 並排四個字和其他字的correlation
word_cors %>%
filter(item1 %in% c("moana", "louis", "waikiki", "shopping")) %>%
group_by(item1) %>%
top_n(6) %>%
ungroup() %>%
mutate(item2 = reorder(item2, correlation)) %>%
ggplot(aes(item2, correlation)) +
geom_bar(stat = "identity") +
facet_wrap(~ item1, scales = "free") +
coord_flip()
Selecting by correlation

set.seed(2016)
word_cors %>%
filter(correlation > .45) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 5) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_void()

LS0tCnRpdGxlOiAiSGlsdG9uIEhhd2FpaWFuIFZpbGxhZ2UgVHJpcGFkdmlzb3IgcmV2aWV3IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgrkvb/nlKhUZXh0IE1pbmluZyB3aXRoIFIgLSBBIFRpZHkgQXBwcm9hY2ggY2gxfmNoNiDliIbmnpBIaWx0b24gSGF3YWlpYW4gVmlsbGFnZSBUcmlwYWR2aXNvciByZXZpZXc8YnI+CgpEYXRh5L6G5rqQOmh0dHBzOi8vZ2l0aHViLmNvbS9zdXNhbmxpMjAxNi9EYXRhLUFuYWx5c2lzLXdpdGgtUi9ibG9iL21hc3Rlci9IaWx0b25fSGF3YWlpYW5fVmlsbGFnZV9XYWlraWtpX0JlYWNoX1Jlc29ydC1Ib25vbHVsdV9PYWh1X0hhd2FpaV9fZW4uY3N2CgoKCiMjIENoMS5UaGUgdGlkeSB0ZXh0IGZvcm1hdAoKYGBge3J9CgpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHJlYWRyKQpsaWJyYXJ5KGx1YnJpZGF0ZSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShzdHJpbmdyKQpsaWJyYXJ5KHRpZHlyKQpsaWJyYXJ5KHNjYWxlcykKbGlicmFyeShicm9vbSkKbGlicmFyeShwdXJycikKbGlicmFyeSh3aWR5cikKbGlicmFyeShpZ3JhcGgpCmxpYnJhcnkoZ2dyYXBoKQpsaWJyYXJ5KFNub3diYWxsQykKbGlicmFyeSh3b3JkY2xvdWQpCmxpYnJhcnkocmVzaGFwZTIpCmxpYnJhcnkodG9waWNtb2RlbHMpCnRoZW1lX3NldCh0aGVtZV9taW5pbWFsKCkpCgpkZiA8LSByZWFkX2NzdigiSGlsdG9uX0hhd2FpaWFuX1ZpbGxhZ2VfV2Fpa2lraV9CZWFjaF9SZXNvcnQtSG9ub2x1bHVfT2FodV9IYXdhaWlfX2VuLmNzdiIpCmRmIDwtIGRmW2NvbXBsZXRlLmNhc2VzKGRmKSwgXQpkZiRpZCA8LSBjKDE6bnJvdyhkZikpCmRmJHJldmlld19kYXRlIDwtIGFzLkRhdGUoZGYkcmV2aWV3X2RhdGUsIGZvcm1hdCA9ICIlZC0lQi0leSIpCmRpbShkZik7IG1pbihkZiRyZXZpZXdfZGF0ZSk7IG1heChkZiRyZXZpZXdfZGF0ZSkKZGYkcmV2aWV3X2JvZHkgPSBnc3ViKCJbWzpkaWdpdDpdXSIsICIiLCBkZiRyZXZpZXdfYm9keSkgI+WOu+mZpOaVuOWtlyDpgb/lhY3lvozpnaLpl5zoga/lnJblh7rnj77lvojlpJrmlbjlrZcKCnRpZHlfZGYgPC0gZGYgJT4lCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCByZXZpZXdfYm9keSkKCmBgYAoKCgojIyMgCgpgYGB7cn0KZGF0YShzdG9wX3dvcmRzKQoKdGlkeV9kZiA8LSB0aWR5X2RmICU+JQogIGFudGlfam9pbihzdG9wX3dvcmRzKQpgYGAKCuWFqOmDqOipleirluS4reeahOWtl+mguwpgYGB7cn0KdGlkeV9kZiAlPiUKICBjb3VudCh3b3JkLCBzb3J0ID0gVFJVRSkgCmBgYAoKCgpgYGB7cn0KdGlkeV9kZiAlPiUKICBjb3VudCh3b3JkLCBzb3J0ID0gVFJVRSkgJT4lCiAgZmlsdGVyKG4gPiA1NTAwKSAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JQogIGdncGxvdChhZXMod29yZCwgbikpICsKICBnZW9tX2NvbCgpICsKICB4bGFiKE5VTEwpICsKICBjb29yZF9mbGlwKCkKYGBgCgoKCiMjIENoMi5TZW50aW1lbnQgYW5hbHlzaXMKCuS9v+eUqOWtl+WFuG5yYyDmn6XnnIvlnKjoqZXoq5bkuK3lh7rnj77lk6rkuptqb3nnmoTlrZcKYGBge3J9CgpucmNfam95IDwtIGdldF9zZW50aW1lbnRzKCJucmMiKSAlPiUgCiAgZmlsdGVyKHNlbnRpbWVudCA9PSAiam95IikKCnRpZHlfZGYgJT4lCiAgaW5uZXJfam9pbihucmNfam95KSAlPiUKICBjb3VudCh3b3JkLCBzb3J0ID0gVFJVRSkgI+azqOaEj+eahOm7nuaYr2JlYWNoLGZvb2QsZGlhbW9uZC4uLuS5n+WcqGpveeijoQpgYGAKCuWPr+S7peeci+WHuuS4u+imgeaYr+mXnOaWvOmjr+W6l+eahOaVtOa9lGNsZWFuLOWPi+WWhGZyaWVuZGx5LGhlbHBmdWws55Kw5aKD5oiW5bu656+J55qE576O6bqXcHJldHR5Li4uLuetiTxicj4KCgrljp90aWR5dGV4dOaWh+eroOevhOS+i+aYr+S7peWwj+iqquevh+eroOe3qOiZn+eCunjou7jvvIx56Lu454K65oOF57eS5YiG5pW45L6G5YGa5ZyW77yM6KGo54++5bCP6Kqq5oOF57eS5L6d5pOa5YqH5oOF5ryU6YCy6ICM55Si55Sf55qE6K6K5YyWPGJyPgrkuI3pgY5ob3RlbCByZXZpZXflsLHopoHorormiJDku6V46Lu454K65pel5pyf5LqGCgpgYGB7cn0KI+aXpeacn+W+njIwMDItMDMtMjF+MjAxOC0wOC0wMgpzdW1tYXJ5KHRpZHlfZGYkcmV2aWV3X2RhdGUpIApgYGAKCmBgYHtyfQpzZW50aW1lbnQgPC0gdGlkeV9kZiAlPiUKICBpbm5lcl9qb2luKGdldF9zZW50aW1lbnRzKCJiaW5nIikpICU+JQogIGNvdW50KGlkLHNlbnRpbWVudCxyZXZpZXdfZGF0ZSkgJT4lIAogIHNwcmVhZChzZW50aW1lbnQsIG4sIGZpbGwgPSAwKSAlPiUKICBtdXRhdGUoc2VudGltZW50ID0gcG9zaXRpdmUgLSBuZWdhdGl2ZSkKCiNjb2wgc2VudGltZW5054K657i95YiG5pW4CgpgYGAKCgpgYGB7cn0Kc2VuX2J5eWVhciA8LSBzZW50aW1lbnQgICU+JSBncm91cF9ieSh5ZWFyKHJldmlld19kYXRlKSkgJT4lIAogIHN1bW1hcmlzZSgKICBzZW50aW1lbnQgPSBtZWFuKHNlbnRpbWVudCkpJT4lIAogIGFzLmRhdGEuZnJhbWUoKQpgYGAKCgoKYGBge3J9CnNlbl9ieXllYXIkYHllYXIocmV2aWV3X2RhdGUpYCA9IGFzLmNoYXJhY3RlcihzZW5fYnl5ZWFyJGB5ZWFyKHJldmlld19kYXRlKWApCnNlbl9ieXllYXIkYHllYXIocmV2aWV3X2RhdGUpYCA8LSBhcy5EYXRlKHNlbl9ieXllYXIkYHllYXIocmV2aWV3X2RhdGUpYCxmb3JtYXQgPSAiJVkiKQpgYGAKCgpgYGB7cn0KZ2dwbG90KHNlbl9ieXllYXIsIGFlcyhgeWVhcihyZXZpZXdfZGF0ZSlgLCBzZW50aW1lbnQpKSArCiAgZ2VvbV9saW5lKCkrCiAgc2NhbGVfeF9kYXRlKGRhdGVfYnJlYWtzID0gIjEgeWVhciIsIGRhdGVfbGFiZWxzID0gIiVZIikKYGBgCgrpoa/npLroqZXoq5blubPlnYfmg4Xnt5LliIbmlbjlnKgyMDA25bm05pmC5pyJ5LiL6ZmN6Lao5Yui77yM5Zyo5pyA6L+R55qEMjAxOOeUmuiHs+aYr+acgOS9jum7ngoKCgojIyMgbW9zdCBjb21tb24gcG9zaXRpdmUgYW5kIG5lZ2F0aXZlIHdvcmRzCgoKYGBge3J9CmJpbmdfd29yZF9jb3VudHMgPC0gdGlkeV9kZiAlPiUKICBpbm5lcl9qb2luKGdldF9zZW50aW1lbnRzKCJiaW5nIikpICU+JQogIGNvdW50KHdvcmQsIHNlbnRpbWVudCwgc29ydCA9IFRSVUUpICU+JQogIHVuZ3JvdXAoKQoKYmluZ193b3JkX2NvdW50cwpgYGAKCgpgYGB7cn0KYmluZ193b3JkX2NvdW50cyAlPiUKICBncm91cF9ieShzZW50aW1lbnQpICU+JQogIHRvcF9uKDEwKSAlPiUKICB1bmdyb3VwKCkgJT4lCiAgbXV0YXRlKHdvcmQgPSByZW9yZGVyKHdvcmQsIG4pKSAlPiUKICBnZ3Bsb3QoYWVzKHdvcmQsIG4sIGZpbGwgPSBzZW50aW1lbnQpKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGZhY2V0X3dyYXAofnNlbnRpbWVudCwgc2NhbGVzID0gImZyZWVfeSIpICsKICBsYWJzKHkgPSAiQ29udHJpYnV0aW9uIHRvIHNlbnRpbWVudCIsCiAgICAgICB4ID0gTlVMTCkgKwogIGNvb3JkX2ZsaXAoKQpgYGAKCiMjIyB3b3JkIGNsb3VkCgpgYGB7cn0KdGlkeV9kZiAlPiUKICBhbnRpX2pvaW4oc3RvcF93b3JkcykgJT4lCiAgY291bnQod29yZCkgJT4lCiAgd2l0aCh3b3JkY2xvdWQod29yZCwgbiwgbWF4LndvcmRzID0gMTAwKSkKYGBgCgoKYGBge3J9CnRpZHlfZGYgJT4lCiAgaW5uZXJfam9pbihnZXRfc2VudGltZW50cygiYmluZyIpKSAlPiUKICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKSAlPiUKICBhY2FzdCh3b3JkIH4gc2VudGltZW50LCB2YWx1ZS52YXIgPSAibiIsIGZpbGwgPSAwKSAlPiUKICBjb21wYXJpc29uLmNsb3VkKGNvbG9ycyA9IGMoImdyYXkyMCIsICJncmF5ODAiKSwKICAgICAgICAgICAgICAgICAgIG1heC53b3JkcyA9IDEwMCkKYGBgCgojIyBDaDMuQW5hbHl6aW5nIHdvcmQgYW5kIGRvY3VtZW50IGZyZXF1ZW5jeTogdGYtaWRmCgpgYGB7cn0KI+eci+eci+acquWOu+mZpHN0b3Agd29yZHPliY3nmoR0ZXJtIGZyZXF1ZW5jeQpkZl93b3JkcyA8LSBkZiAlPiUKICB1bm5lc3RfdG9rZW5zKHdvcmQsIHJldmlld19ib2R5KSAlPiUKICBjb3VudChpZCwgd29yZCwgc29ydCA9IFRSVUUpICU+JQogIHVuZ3JvdXAoKQoKdG90YWxfd29yZHMgPC0gZGZfd29yZHMgJT4lIAogIGdyb3VwX2J5KGlkKSAlPiUgCiAgc3VtbWFyaXplKHRvdGFsID0gc3VtKG4pKQoKYm9va193b3JkcyA8LSBsZWZ0X2pvaW4oZGZfd29yZHMsIHRvdGFsX3dvcmRzKQoKYm9va193b3JkcwpgYGAKCm7mmK93b3Jk6Kme6aC7LHRvdGFs5pivaWQgcmV2aWV355qE57i95a2X5pW4CgoKIyMjIFppcGYncyBsYXcKCmBgYHtyfQpmcmVxX2J5X3JhbmsgPC0gYm9va193b3JkcyAlPiUgCiAgZ3JvdXBfYnkoaWQpICU+JSAKICBtdXRhdGUocmFuayA9IHJvd19udW1iZXIoKSwgCiAgICAgICAgIGB0ZXJtIGZyZXF1ZW5jeWAgPSBuL3RvdGFsKQoKZnJlcV9ieV9yYW5rCmBgYAoK5Lul5LiA56+H6KmV6KuW55W25L2c5LiA56+H5paH5pys5L6G55yL77yM5bi46KaL55qEc3RvcCB3b3Jkc+W5vuS5jumDveaYr3JhbmvliY3lub7lkI0o55u46LyD5YW25LuW5a2X6Kme77yM5Ye654++6aC7546H55qEcmFuaykKCgoKYGBge3J9CnJhbmtfc3Vic2V0IDwtIGZyZXFfYnlfcmFuayAlPiUgCiAgZmlsdGVyKHJhbmsgPCA1MDAsCiAgICAgICAgIHJhbmsgPiAxMCkKCmxtKGxvZzEwKGB0ZXJtIGZyZXF1ZW5jeWApIH4gbG9nMTAocmFuayksIGRhdGEgPSByYW5rX3N1YnNldCkKYGBgCgoKCiMjIyBiaW5kIHRmLWlkZiBmdW5jdGluCgpgYGB7cn0KYm9va193b3JkcyA8LSBib29rX3dvcmRzICU+JQogIGJpbmRfdGZfaWRmKHdvcmQsaWQsIG4pCmJvb2tfd29yZHMKYGBgCgpgYGB7cn0KYm9va193b3JkcyAlPiUKICAjc2VsZWN0KC10b3RhbCkgJT4lCiAgYXJyYW5nZShkZXNjKHRmX2lkZikpCmBgYAoKCgpgYGB7cn0KYm9va193b3JkcyAlPiUKICBhcnJhbmdlKGRlc2ModGZfaWRmKSkgJT4lCiAgbXV0YXRlKHdvcmQgPSBmYWN0b3Iod29yZCwgbGV2ZWxzID0gcmV2KHVuaXF1ZSh3b3JkKSkpKSAlPiUgCiAgdG9wX24oMTUpICU+JSAKICB1bmdyb3VwICU+JQogIGdncGxvdChhZXMod29yZCwgdGZfaWRmKSkgKwogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBsYWJzKHggPSBOVUxMLCB5ID0gInRmLWlkZiIpICsKICBjb29yZF9mbGlwKCkKYGBgCgoKIyMgQ2g0LlJlbGF0aW9uc2hpcHMgYmV0d2VlbiB3b3Jkczogbi1ncmFtcyBhbmQgY29ycmVsYXRpb25zCgpgYGB7cn0KZGZfYmlncmFtcyA8LSBkZiAlPiUKICB1bm5lc3RfdG9rZW5zKGJpZ3JhbSwgcmV2aWV3X2JvZHksIHRva2VuID0gIm5ncmFtcyIsIG4gPSAyKQoKZGZfYmlncmFtcwpgYGAKCmBgYHtyfQpkZl9iaWdyYW1zICU+JQogIGNvdW50KGJpZ3JhbSwgc29ydCA9IFRSVUUpCmBgYAoKYGBge3J9CmJpZ3JhbXNfc2VwYXJhdGVkIDwtIGRmX2JpZ3JhbXMgJT4lCiAgc2VwYXJhdGUoYmlncmFtLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpCgpiaWdyYW1zX2ZpbHRlcmVkIDwtIGJpZ3JhbXNfc2VwYXJhdGVkICU+JQogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JQogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpCgojIG5ldyBiaWdyYW0gY291bnRzOgpiaWdyYW1fY291bnRzIDwtIGJpZ3JhbXNfZmlsdGVyZWQgJT4lIAogIGNvdW50KHdvcmQxLCB3b3JkMiwgc29ydCA9IFRSVUUpCgpiaWdyYW1fY291bnRzCmBgYAoKCmBgYHtyfQpiaWdyYW1zX3VuaXRlZCA8LSBiaWdyYW1zX2ZpbHRlcmVkICU+JQogIHVuaXRlKGJpZ3JhbSwgd29yZDEsIHdvcmQyLCBzZXAgPSAiICIpCgpiaWdyYW1zX3VuaXRlZApgYGAKCmBgYHtyfQpkZiAlPiUKICB1bm5lc3RfdG9rZW5zKHRyaWdyYW0scmV2aWV3X2JvZHksIHRva2VuID0gIm5ncmFtcyIsIG4gPSAzKSAlPiUKICBzZXBhcmF0ZSh0cmlncmFtLCBjKCJ3b3JkMSIsICJ3b3JkMiIsICJ3b3JkMyIpLCBzZXAgPSAiICIpICU+JQogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQsCiAgICAgICAgICF3b3JkMiAlaW4lIHN0b3Bfd29yZHMkd29yZCwKICAgICAgICAgIXdvcmQzICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUKICBjb3VudCh3b3JkMSwgd29yZDIsIHdvcmQzLCBzb3J0ID0gVFJVRSkKYGBgCgpgYGB7cn0KYmlncmFtX3RmX2lkZiA8LSBiaWdyYW1zX3VuaXRlZCAlPiUKICBjb3VudChpZCwgYmlncmFtKSAlPiUKICBiaW5kX3RmX2lkZihiaWdyYW0sIGlkLCBuKSAlPiUKICBhcnJhbmdlKGRlc2ModGZfaWRmKSkKCmJpZ3JhbV90Zl9pZGYKYGBgCgrmjpLlnKhub3TlvoznmoTlrZfoqZ4gCgpgYGB7cn0KQUZJTk4gPC0gZ2V0X3NlbnRpbWVudHMoImFmaW5uIikKCm5vdF93b3JkcyA8LSBiaWdyYW1zX3NlcGFyYXRlZCAlPiUKICBmaWx0ZXIod29yZDEgPT0gIm5vdCIpICU+JQogIGlubmVyX2pvaW4oQUZJTk4sIGJ5ID0gYyh3b3JkMiA9ICJ3b3JkIikpICU+JQogIGNvdW50KHdvcmQyLCBzY29yZSwgc29ydCA9IFRSVUUpICU+JQogIHVuZ3JvdXAoKQoKbm90X3dvcmRzCmBgYAoKCmBgYHtyfQpub3Rfd29yZHMgJT4lCiAgbXV0YXRlKGNvbnRyaWJ1dGlvbiA9IG4gKiBzY29yZSkgJT4lCiAgYXJyYW5nZShkZXNjKGFicyhjb250cmlidXRpb24pKSkgJT4lCiAgaGVhZCgyMCkgJT4lCiAgbXV0YXRlKHdvcmQyID0gcmVvcmRlcih3b3JkMiwgY29udHJpYnV0aW9uKSkgJT4lCiAgZ2dwbG90KGFlcyh3b3JkMiwgbiAqIHNjb3JlLCBmaWxsID0gbiAqIHNjb3JlID4gMCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgeGxhYigiV29yZHMgcHJlY2VkZWQgYnkgXCJub3RcIiIpICsKICB5bGFiKCJTZW50aW1lbnQgc2NvcmUgKiBudW1iZXIgb2Ygb2NjdXJyZW5jZXMiKSArCiAgY29vcmRfZmxpcCgpCmBgYAoK5LiN5Y+qbm905Luj6KGo5ZCm5a6a77yM5Yqg5YWl5YW25LuW5ZCm5a6a5a2X6KmeCmBgYHtyfQpuZWdhdGlvbl93b3JkcyA8LSBjKCJub3QiLCAibm8iLCAibmV2ZXIiLCAid2l0aG91dCIpCgpuZWdhdGVkX3dvcmRzIDwtIGJpZ3JhbXNfc2VwYXJhdGVkICU+JQogIGZpbHRlcih3b3JkMSAlaW4lIG5lZ2F0aW9uX3dvcmRzKSAlPiUKICBpbm5lcl9qb2luKEFGSU5OLCBieSA9IGMod29yZDIgPSAid29yZCIpKSAlPiUKICBjb3VudCh3b3JkMSwgd29yZDIsIHNjb3JlLCBzb3J0ID0gVFJVRSkgJT4lCiAgdW5ncm91cCgpCmBgYAoKYGBge3J9Cm5lZ2F0ZWRfd29yZHMgJT4lCiAgbXV0YXRlKGNvbnRyaWJ1dGlvbiA9IG4gKiBzY29yZSkgJT4lCiAgYXJyYW5nZShkZXNjKGFicyhjb250cmlidXRpb24pKSkgJT4lCiAgaGVhZCgyMCkgJT4lCiAgbXV0YXRlKHdvcmQyID0gcmVvcmRlcih3b3JkMiwgY29udHJpYnV0aW9uKSkgJT4lCiAgZ2dwbG90KGFlcyh3b3JkMiwgbiAqIHNjb3JlLCBmaWxsID0gbiAqIHNjb3JlID4gMCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh+d29yZDEsIHNjYWxlcyA9ICJmcmVlX3kiKSArICAKICB4bGFiKCJXb3JkcyBwcmVjZWRlZCBieSBcIm5lZ2F0ZWQgd29yZHNcIiIpICsKICB5bGFiKCJTZW50aW1lbnQgc2NvcmUgKiBudW1iZXIgb2Ygb2NjdXJyZW5jZXMiKSArCiAgY29vcmRfZmxpcCgpCmBgYAoKCmBgYHtyfQojc2F2ZS5pbWFnZSgidGlkeXRleHRfaGF3YWkuUkRhdGEiKQpgYGAKCgojIyMgVmlzdWFsaXppbmcgYSBuZXR3b3JrIG9mIGJpZ3JhbXMgd2l0aCBnZ3JhcGgKCmBgYHtyfQpiaWdyYW1fZ3JhcGggPC0gYmlncmFtX2NvdW50cyAlPiUKICBmaWx0ZXIobiA+IDkwKSAlPiUKICBncmFwaF9mcm9tX2RhdGFfZnJhbWUoKQoKYmlncmFtX2dyYXBoCmBgYAoKCmBgYHtyfQpsaWJyYXJ5KGdncmFwaCkKc2V0LnNlZWQoMjAxNykKCiMg5YmN6JmV55CG6YKE6aCI5oqK5pW45a2X5Y675o6JCmdncmFwaChiaWdyYW1fZ3JhcGgsIGxheW91dCA9ICJmciIpICsgCiAgZ2VvbV9lZGdlX2xpbmsoKSArCiAgZ2VvbV9ub2RlX3BvaW50KCkgKwogIGdlb21fbm9kZV90ZXh0KGFlcyhsYWJlbCA9IG5hbWUpLCB2anVzdCA9IDEsIGhqdXN0ID0gMSkKYGBgCgpgYGB7cn0Kc2V0LnNlZWQoMjAxNikKCmEgPC0gZ3JpZDo6YXJyb3codHlwZSA9ICJjbG9zZWQiLCBsZW5ndGggPSB1bml0KC4xNSwgImluY2hlcyIpKQoKZ2dyYXBoKGJpZ3JhbV9ncmFwaCwgbGF5b3V0ID0gImZyIikgKwogIGdlb21fZWRnZV9saW5rKGFlcyhlZGdlX2FscGhhID0gbiksIHNob3cubGVnZW5kID0gRkFMU0UsCiAgICAgICAgICAgICAgICAgYXJyb3cgPSBhLCBlbmRfY2FwID0gY2lyY2xlKC4wNywgJ2luY2hlcycpKSArCiAgZ2VvbV9ub2RlX3BvaW50KGNvbG9yID0gImxpZ2h0Ymx1ZSIsIHNpemUgPSA1KSArCiAgZ2VvbV9ub2RlX3RleHQoYWVzKGxhYmVsID0gbmFtZSksIHZqdXN0ID0gMSwgaGp1c3QgPSAxKSArCiAgdGhlbWVfdm9pZCgpCmBgYAoKCgojIyMgQ291bnRpbmcgYW5kIGNvcnJlbGF0aW5nIGFtb25nIHJldmlld3MKCmBgYHtyfQpsaWJyYXJ5KHdpZHlyKQoKIyBjb3VudCB3b3JkcyBjby1vY2N1cmluZyB3aXRoaW4gc2VjdGlvbnMKd29yZF9wYWlycyA8LSB0aWR5X2RmICU+JQogIHBhaXJ3aXNlX2NvdW50KHdvcmQsIGlkLCBzb3J0ID0gVFJVRSkKCndvcmRfcGFpcnMKYGBgCgrnnIvlh7rlnKjmr4/kuIDliYfoqZXoq5bkuK3vvIzmnIDluLjkuIDotbflh7rnj77nmoTlhanlgIvlrZc8YnI+CgoK5Lmf5Y+v5Lul5p+l55yL5p+Q5LiA5Zau5a2X5pyA5bi45ZKM6Kqw5LiA6LW35Ye654++CgpgYGB7cn0Kd29yZF9wYWlycyAlPiUKICBmaWx0ZXIoaXRlbTEgPT0gInBvb2wiKQpgYGAKCiMjIyBwYWlyd2lzZSBjb3JyZWxhdGlvbgoKYGBge3J9CndvcmRfY29ycyA8LSB0aWR5X2RmICU+JQogIGdyb3VwX2J5KHdvcmQpICU+JQogIGZpbHRlcihuKCkgPj0gMjApICU+JQogIHBhaXJ3aXNlX2Nvcih3b3JkLCBpZCwgc29ydCA9IFRSVUUpCgp3b3JkX2NvcnMKYGBgCgoK5p+l55yLbW9hbmHpgJnlgIvllq7lrZfmnIDluLjlkozoqrDkuIDotbflh7rnj74KYGBge3J9CndvcmRfY29ycyAlPiUKICBmaWx0ZXIoaXRlbTEgPT0gIm1vYW5hIikKYGBgCgoK5Lul6ZW35qKd5ZyW5o6S5bqPIOS4puaOkuWbm+WAi+Wtl+WSjOWFtuS7luWtl+eahGNvcnJlbGF0aW9uCmBgYHtyfQp3b3JkX2NvcnMgJT4lCiAgZmlsdGVyKGl0ZW0xICVpbiUgYygibW9hbmEiLCAibG91aXMiLCAid2Fpa2lraSIsICJzaG9wcGluZyIpKSAlPiUKICBncm91cF9ieShpdGVtMSkgJT4lCiAgdG9wX24oNikgJT4lCiAgdW5ncm91cCgpICU+JQogIG11dGF0ZShpdGVtMiA9IHJlb3JkZXIoaXRlbTIsIGNvcnJlbGF0aW9uKSkgJT4lCiAgZ2dwbG90KGFlcyhpdGVtMiwgY29ycmVsYXRpb24pKSArCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIpICsKICBmYWNldF93cmFwKH4gaXRlbTEsIHNjYWxlcyA9ICJmcmVlIikgKwogIGNvb3JkX2ZsaXAoKQpgYGAKCgpgYGB7cn0Kc2V0LnNlZWQoMjAxNikKCndvcmRfY29ycyAlPiUKICBmaWx0ZXIoY29ycmVsYXRpb24gPiAuNDUpICU+JQogIGdyYXBoX2Zyb21fZGF0YV9mcmFtZSgpICU+JQogIGdncmFwaChsYXlvdXQgPSAiZnIiKSArCiAgZ2VvbV9lZGdlX2xpbmsoYWVzKGVkZ2VfYWxwaGEgPSBjb3JyZWxhdGlvbiksIHNob3cubGVnZW5kID0gRkFMU0UpICsKICBnZW9tX25vZGVfcG9pbnQoY29sb3IgPSAibGlnaHRibHVlIiwgc2l6ZSA9IDUpICsKICBnZW9tX25vZGVfdGV4dChhZXMobGFiZWwgPSBuYW1lKSwgcmVwZWwgPSBUUlVFKSArCiAgdGhlbWVfdm9pZCgpCmBgYAoKCiMjIENoNS5Db252ZXJ0aW5nIHRvIGFuZCBmcm9tIG5vbi10aWR5IGZvcm1hdHMKCiMjIyBDYXN0aW5nIHRpZHkgdGV4dCBkYXRhIGludG8gYSBtYXRyaXgKCmBgYHtyfQpkZl9kdG0gPC0gdGlkeV9kZiAlPiUKICBjb3VudChpZCwgd29yZCkgJT4lCiAgY2FzdF9kdG0oaWQsIHdvcmQsIG4pCgpkZl9kdG0KYGBgCgoo5LiN5aWX55SoYWNx6IiHc3RvY2spCgoKIyMjIyDku6Vsb3VnaHJhbuWwh+aDhee3kuWIhuaIkOWFreeorgoKYGBge3J9CnRpZHlfZGYgJT4lCiAgY291bnQod29yZCkgJT4lCiAgaW5uZXJfam9pbihnZXRfc2VudGltZW50cygibG91Z2hyYW4iKSwgYnkgPSAid29yZCIpICU+JQogIGdyb3VwX2J5KHNlbnRpbWVudCkgJT4lCiAgdG9wX24oNSwgbikgJT4lCiAgdW5ncm91cCgpICU+JQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcih3b3JkLCBuKSkgJT4lCiAgZ2dwbG90KGFlcyh3b3JkLCBuKSkgKwogIGdlb21fY29sKCkgKwogIGNvb3JkX2ZsaXAoKSArCiAgZmFjZXRfd3JhcCh+IHNlbnRpbWVudCwgc2NhbGVzID0gImZyZWUiKSArCiAgeWxhYigiRnJlcXVlbmN5IG9mIHRoaXMgd29yZCBpbiB0aGUgcmV2aWV3cyIpCmBgYAoKCiMjIENoNi5Ub3BpYyBtb2RlbGluZwoKCiMjIyBMREEgb24gcmV2aWV3cwoKYGBge3J9CmRmX2xkYSA8LSBMREEoZGZfZHRtLCBrID0gNCwgY29udHJvbCA9IGxpc3Qoc2VlZCA9IDEyMzQpKQpkZl9sZGEKYGBgCgpwZXItdG9waWNjLXBlci13b3JkIHByb2JhYmlsaXR5CgpgYGB7cn0KZGZfdG9waWNzIDwtIHRpZHkoZGZfbGRhLCBtYXRyaXggPSAiYmV0YSIpCmRmX3RvcGljcwpgYGAKCgpgYGB7cn0KdG9wX3Rlcm1zIDwtIGRmX3RvcGljcyAlPiUKICBncm91cF9ieSh0b3BpYykgJT4lCiAgdG9wX24oNywgYmV0YSkgJT4lCiAgdW5ncm91cCgpICU+JQogIGFycmFuZ2UodG9waWMsIC1iZXRhKQoKdG9wX3Rlcm1zCmBgYAoKYGBge3J9CnRvcF90ZXJtcyAlPiUKICBtdXRhdGUodGVybSA9IHJlb3JkZXIodGVybSwgYmV0YSkpICU+JQogIGdncGxvdChhZXModGVybSwgYmV0YSwgZmlsbCA9IGZhY3Rvcih0b3BpYykpKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGZhY2V0X3dyYXAofiB0b3BpYywgc2NhbGVzID0gImZyZWUiKSArCiAgY29vcmRfZmxpcCgpCmBgYAoKCuaEn+imuueci+S4jeWkquWHuuS+hnJldmlld+WQhOWbm+WAi+S4u+mhjOacieS7gOm6vOaYjumhr+W3ruWIpQoKCiMjIyBQZXItZG9jdW1lbnQgY2xhc3NpZmljYXRpb24KCijlm6DngrroqZXoq5bljp/mnKzlsLHmspLmnInliIbpoZ7vvIzmiYDku6XlsLHmspLmnInot5HpgJnnq6ApCgoKIyMjIEJ5IHdvcmQgYXNzaWdubWVudHM6IGF1Z21lbnQKCmBgYHtyfQphc3NpZ25tZW50cyA8LSBhdWdtZW50KGRmX2xkYSwgZGF0YSA9IGRmX2R0bSkKYXNzaWdubWVudHMKYGBgCgoo5Zug54K66KmV6KuW5Y6f5pys5bCx5rKS5pyJ5YiG6aGe77yM5omA5Lul5rKS5pyJ5ris6Kmm5Li76aGM5pyJ5rKS5pyJ5YiG6aGe6Yyv6KqkKQo=