ch0.套件取得及資料載入
套件
library(data.table)
library(ggplot2)
library(dplyr)
library(jiebaR)
library(tidytext)
library(stringr)
library(tm)
library(topicmodels)
library(purrr)
require(RColorBrewer)
mycolors <- colorRampPalette(brewer.pal(8, "Set3"))(20)
資料描述
透過中山管院文字分析平台,載入聯合新聞網、蘋果新聞網、東森新聞網的新聞,搜尋關鍵字為「疫苗」,時間從2020/10/01到2021/05/09。
metadata <- fread("news_articleMetaData.csv", encoding = "UTF-8")
可以看到疫苗討論在2月過後的新聞報導數量增加
metadata %>%
mutate(artDate = as.Date(artDate)) %>%
group_by(artDate) %>%
summarise(count = n())%>%
ggplot(aes(artDate,count))+
geom_line(color="red")+
geom_point()
#Ch1. Document Term Matrix (DTM)
資料前處理
使用默認參數初始化一個斷詞引擎
jieba_tokenizer = worker()
news_tokenizer <- function(t) {
lapply(t, function(x) {
if(nchar(x)>1){
tokens <- segment(x, jieba_tokenizer)
# 去掉字串長度爲1的詞彙
tokens <- tokens[nchar(tokens)>1]
return(tokens)
}
})
}
計算每篇文章各token出現次數
tokens <- metadata %>%
unnest_tokens(word, sentence, token=news_tokenizer) %>%
filter((!str_detect(word, regex("[0-9a-zA-Z]"))) | str_detect(word, regex("[Aa][Zz]"))) %>%
count(artUrl, word) %>%
rename(count=n)
tokens %>% head(20)
將資料轉換為Document Term Matrix (DTM)
dtm <-tokens %>% cast_dtm(artUrl, word, count)
dtm
inspect(dtm[1:10,1:10])
ch2. 主題模型
建立LDA模型
lda <- LDA(dtm, k = 2, control = list(seed = 2021))
# lda <- LDA(dtm, k = 2, control = list(seed = 2021,alpha = 2,delta=0.1),method = "Gibbs") #調整alpha即delta
lda
利用LDA模型建立phi矩陣
topics_words <- tidy(lda, matrix = "beta") # 注意,在tidy function裡面要使用"beta"來取出Phi矩陣。
colnames(topics_words) <- c("topic", "term", "phi")
topics_words
尋找Topic的代表字
terms依照各主題的phi值由大到小排序,列出前10大
topics_words %>%
group_by(topic) %>%
top_n(10, phi) %>%
ungroup() %>%
mutate(top_words = reorder_within(term,phi,topic)) %>%
ggplot(aes(x = top_words, y = phi, fill = as.factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free") +
coord_flip() +
scale_x_reordered()
#ch3. 尋找最佳主題數
建立更多主題的主題模型
嘗試2、4、6、10、15個主題數,將結果存起來,再做進一步分析。 此部分需要跑一段時間,已經將跑完的檔案存成ldas_result.rdata,可以直接載入
# ldas = c()
# topics = c(2,4,6,10,15)
# for(topic in topics){
# start_time <- Sys.time()
# lda <- LDA(dtm, k = topic, control = list(seed = 2021))
# ldas =c(ldas,lda)
# print(paste(topic ,paste("topic(s) and use time is ", Sys.time() -start_time)))
# save(ldas,file = "ldas_result.rdata") # 將模型輸出成檔案
# }
載入每個主題的LDA結果
load("ldas_result.rdata")
透過perplexity找到最佳主題數
topics = c(2,4,6,10,15)
data_frame(k = topics, perplex = map_dbl(ldas, topicmodels::perplexity)) %>%
ggplot(aes(k, perplex)) +
geom_point() +
geom_line() +
labs(title = "Evaluating LDA topic models",
subtitle = "Optimal number of topics (smaller is better)",
x = "Number of topics",
y = "Perplexity")
create LDAvis所需的json function 此function是將前面使用 “LDA function”所建立的model,轉換為“LDAVis”套件的input格式。
topicmodels_json_ldavis <- function(fitted, doc_term){
require(LDAvis)
require(slam)
###以下function 用來解決,主題數多會出現NA的問題
### 參考 https://github.com/cpsievert/LDAvis/commit/c7234d71168b1e946a361bc00593bc5c4bf8e57e
ls_LDA = function (phi){
jensenShannon <- function(x, y) {
m <- 0.5 * (x + y)
lhs <- ifelse(x == 0, 0, x * (log(x) - log(m+1e-16)))
rhs <- ifelse(y == 0, 0, y * (log(y) - log(m+1e-16)))
0.5 * sum(lhs) + 0.5 * sum(rhs)
}
dist.mat <- proxy::dist(x = phi, method = jensenShannon)
pca.fit <- stats::cmdscale(dist.mat, k = 2)
data.frame(x = pca.fit[, 1], y = pca.fit[, 2])
}
# Find required quantities
phi <- as.matrix(posterior(fitted)$terms)
theta <- as.matrix(posterior(fitted)$topics)
vocab <- colnames(phi)
term_freq <- slam::col_sums(doc_term)
# Convert to json
json_lda <- LDAvis::createJSON(phi = phi, theta = theta,
vocab = vocab,
doc.length = as.vector(table(doc_term$i)),
term.frequency = term_freq, mds.method = ls_LDA)
return(json_lda)
}
產生LDAvis結果
the_lda = ldas[[2]]
json_res <- topicmodels_json_ldavis(the_lda,dtm)
serVis(json_res,open.browser = T)
產生LDAvis檔案,存至local端
serVis(json_res, out.dir = "vis", open.browser = T)
writeLines(iconv(readLines("./vis/lda.json"), to = "UTF8"))
ch4. LDA分析
選定4個主題數的主題模型
the_lda = ldas[[2]] ## 選定topic 為 4 的結果
topics_words <- tidy(the_lda, matrix = "beta") # 注意,在tidy function裡面要使用"beta"來取出Phi矩陣。
colnames(topics_words) <- c("topic", "term", "phi")
topics_words %>% arrange(desc(phi)) %>% head(10)
terms依照各主題的phi值由大到小排序
topics_words %>%
group_by(topic) %>%
top_n(10, phi) %>%
ungroup() %>%
ggplot(aes(x = reorder_within(term,phi,topic), y = phi, fill = as.factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free") +
coord_flip() +
scale_x_reordered()
去除共通詞彙
removed_word = c("肺炎","新冠","疫苗","接種","目前","表示","沒有")
topics_words %>%
filter(!term %in% removed_word) %>%
group_by(topic) %>%
top_n(10, phi) %>%
ungroup() %>%
ggplot(aes(x = reorder_within(term,phi,topic), y = phi, fill = as.factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free") +
coord_flip() +
scale_x_reordered()
主題命名
topics_name = c("AZ疫苗","台灣疫苗施打","疫苗研發進度","輝瑞疫苗")
Document 主題分佈
# for every document we have a probability distribution of its contained topics
tmResult <- posterior(the_lda)
doc_pro <- tmResult$topics
document_topics <- doc_pro[metadata$artUrl,]
document_topics_df =data.frame(document_topics)
colnames(document_topics_df) = topics_name
rownames(document_topics_df) = NULL
news_topic = cbind(metadata,document_topics_df)
現在我們看每一篇的文章分佈了!
查看特定主題的文章
- 透過找到特定文章的分佈進行排序之後,可以看到此主題的比重高的文章在討論什麼。
news_topic %>%
arrange(desc(`AZ疫苗`)) %>%head(10)
了解主題在時間的變化
news_topic %>%
mutate(artDate = as.Date(artDate)) %>%
group_by(artDate = format(artDate,'%Y%m')) %>%
summarise_if(is.numeric, sum, na.rm = TRUE) %>%
melt(id.vars = "artDate")%>%
ggplot( aes(x=artDate, y=value, fill=variable)) +
geom_bar(stat = "identity") + ylab("value") +
scale_fill_manual(values=mycolors[c(1,5,8,12)])+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
去除筆數少月份
news_topic %>%
mutate(artDate = as.Date(artDate)) %>%
filter( !format(artDate,'%Y%m') %in% c(202011,202105))%>%
group_by(artDate = format(artDate,'%Y%m')) %>%
summarise_if(is.numeric, sum, na.rm = TRUE) %>%
melt(id.vars = "artDate")%>%
ggplot( aes(x=artDate, y=value, fill=variable)) +
geom_bar(stat = "identity") + ylab("value") +
scale_fill_manual(values=mycolors[c(1,5,8,12)])+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
以比例了解主題時間變化
news_topic %>%
mutate(artDate = as.Date(artDate)) %>%
filter( !format(artDate,'%Y%m') %in% c(202011,202105))%>%
group_by(artDate = format(artDate,'%Y%m')) %>%
summarise_if(is.numeric, sum, na.rm = TRUE) %>%
melt(id.vars = "artDate")%>%
group_by(artDate)%>%
mutate(total_value =sum(value))%>%
ggplot( aes(x=artDate, y=value/total_value, fill=variable)) +
geom_bar(stat = "identity") + ylab("proportion") +
scale_fill_manual(values=mycolors[c(1,5,8,12)])+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
補充 - 不同訓練LDA模型套件
參考 http://text2vec.org/topic_modeling.html#latent_dirichlet_allocation
library(text2vec)
library(udpipe)
tokens <- metadata %>%
unnest_tokens(word, sentence, token=news_tokenizer) %>%
filter(!str_detect(word, regex("[0-9a-zA-Z]"))| str_detect(word, regex("[Aa][Zz]")))
建立DTM matrix
dtf <- document_term_frequencies(tokens, document = "artUrl", term = "word")
dtm <- document_term_matrix(x = dtf)
dtm_clean <- dtm_remove_lowfreq(dtm, minfreq = 30)
dim(dtm_clean)
LDA 模型
set.seed(2019)
topic_n = 4
lda_model =text2vec::LDA$new(n_topics = topic_n,doc_topic_prior = 0.1, topic_word_prior = 0.001)
doc_topic_distr =lda_model$fit_transform(dtm_clean, n_iter = 1000, convergence_tol = 1e-5,check_convergence_every_n = 100)
這個比topicmodels的package跑快超多倍
一樣可以用LDAvis的套件來看
lda_model$get_top_words(n = 10, lambda = 0.5) ## 查看 前10主題字
lda_model$plot()
# lda_model$plot(out.dir ="lda_result", open.browser = TRUE)
LS0tDQp0aXRsZTogIuS9v+eUqOS4u+mhjOaooeWei+WIhuaekOaWsOWGoOiCuueCjueWq+iLl+S4reaWh+aWsOiBnuizh+aWmSINCmF1dGhvcjogIueOi+WTgeWgryINCmRhdGU6ICIyMDIxLzA1LzExIg0Kb3V0cHV0Og0KICBodG1sX25vdGVib29rOg0KICAgIGNzczogc3R5bGUuY3NzDQogICAgaGlnaGxpZ2h0OiBweWdtZW50cw0KICAgIHRoZW1lOiBmbGF0bHkNCiAgICB0b2M6IHllcw0KICAgIHRvY19mbG9hdDogeWVzDQogIGh0bWxfZG9jdW1lbnQ6DQogICAgZGZfcHJpbnQ6IHBhZ2VkDQogICAgdG9jOiB5ZXMNCmVkaXRvcl9vcHRpb25zOiANCiAgY2h1bmtfb3V0cHV0X3R5cGU6IGlubGluZQ0KLS0tDQoNCg0KIyBjaDAu5aWX5Lu25Y+W5b6X5Y+K6LOH5paZ6LyJ5YWlDQojIyDlpZfku7YNCmBgYHtyfQ0KbGlicmFyeShkYXRhLnRhYmxlKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoamllYmFSKQ0KbGlicmFyeSh0aWR5dGV4dCkNCmxpYnJhcnkoc3RyaW5ncikNCmxpYnJhcnkodG0pDQpsaWJyYXJ5KHRvcGljbW9kZWxzKQ0KbGlicmFyeShwdXJycikNCnJlcXVpcmUoUkNvbG9yQnJld2VyKQ0KbXljb2xvcnMgPC0gY29sb3JSYW1wUGFsZXR0ZShicmV3ZXIucGFsKDgsICJTZXQzIikpKDIwKQ0KYGBgDQoNCg0KIyMg6LOH5paZ5o+P6L+wDQoNCj4g6YCP6YGO5Lit5bGx566h6Zmi5paH5a2X5YiG5p6Q5bmz5Y+w77yM6LyJ5YWl6IGv5ZCI5paw6IGe57ay44CB6JiL5p6c5paw6IGe57ay44CB5p2x5qOu5paw6IGe57ay55qE5paw6IGe77yM5pCc5bCL6Zec6Y215a2X54K644CM55ar6IuX44CN77yM5pmC6ZaT5b6eMjAyMC8xMC8wMeWIsDIwMjEvMDUvMDnjgIINCg0KYGBge3J9DQptZXRhZGF0YSA8LSBmcmVhZCgibmV3c19hcnRpY2xlTWV0YURhdGEuY3N2IiwgZW5jb2RpbmcgPSAiVVRGLTgiKQ0KYGBgDQoNCj4g5Y+v5Lul55yL5Yiw55ar6IuX6KiO6KuW5ZyoMuaciOmBjuW+jOeahOaWsOiBnuWgseWwjuaVuOmHj+WinuWKoA0KDQpgYGB7cn0NCm1ldGFkYXRhICU+JSANCiAgbXV0YXRlKGFydERhdGUgPSBhcy5EYXRlKGFydERhdGUpKSAlPiUNCiAgZ3JvdXBfYnkoYXJ0RGF0ZSkgJT4lDQogIHN1bW1hcmlzZShjb3VudCA9IG4oKSklPiUNCiAgZ2dwbG90KGFlcyhhcnREYXRlLGNvdW50KSkrDQogICAgZ2VvbV9saW5lKGNvbG9yPSJyZWQiKSsNCiAgICBnZW9tX3BvaW50KCkNCmBgYA0KDQojQ2gxLiBEb2N1bWVudCBUZXJtIE1hdHJpeCAoRFRNKQ0KDQojIyDos4fmlpnliY3omZXnkIYNCg0KPuS9v+eUqOm7mOiqjeWPg+aVuOWIneWni+WMluS4gOWAi+aWt+ipnuW8leaTjg0KDQpgYGB7cn0NCmppZWJhX3Rva2VuaXplciA9IHdvcmtlcigpDQpuZXdzX3Rva2VuaXplciA8LSBmdW5jdGlvbih0KSB7DQogIGxhcHBseSh0LCBmdW5jdGlvbih4KSB7DQogICAgaWYobmNoYXIoeCk+MSl7DQogICAgICB0b2tlbnMgPC0gc2VnbWVudCh4LCBqaWViYV90b2tlbml6ZXIpDQogICAgICAjIOWOu+aOieWtl+S4sumVt+W6pueIsjHnmoToqZ7lvZkNCiAgICAgIHRva2VucyA8LSB0b2tlbnNbbmNoYXIodG9rZW5zKT4xXQ0KICAgICAgcmV0dXJuKHRva2VucykNCiAgICB9DQogIH0pDQp9DQpgYGANCg0KPiDoqIjnrpfmr4/nr4fmlofnq6DlkIR0b2tlbuWHuuePvuasoeaVuA0KDQpgYGB7cn0NCnRva2VucyA8LSBtZXRhZGF0YSAlPiUNCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCBzZW50ZW5jZSwgdG9rZW49bmV3c190b2tlbml6ZXIpICU+JQ0KICBmaWx0ZXIoKCFzdHJfZGV0ZWN0KHdvcmQsIHJlZ2V4KCJbMC05YS16QS1aXSIpKSkgfCBzdHJfZGV0ZWN0KHdvcmQsIHJlZ2V4KCJbQWFdW1p6XSIpKSkgJT4lDQogIGNvdW50KGFydFVybCwgd29yZCkgJT4lDQogIHJlbmFtZShjb3VudD1uKQ0KdG9rZW5zICU+JSBoZWFkKDIwKQ0KYGBgDQoNCiMjIOWwh+izh+aWmei9ieaPm+eCukRvY3VtZW50IFRlcm0gTWF0cml4IChEVE0pDQoNCmBgYHtyfQ0KZHRtIDwtdG9rZW5zICU+JSBjYXN0X2R0bShhcnRVcmwsIHdvcmQsIGNvdW50KQ0KZHRtDQppbnNwZWN0KGR0bVsxOjEwLDE6MTBdKQ0KYGBgDQojIGNoMi4g5Li76aGM5qih5Z6LDQoNCiMjIOW7uueri0xEQeaooeWeiw0KDQpgYGB7cn0NCmxkYSA8LSBMREEoZHRtLCBrID0gMiwgY29udHJvbCA9IGxpc3Qoc2VlZCA9IDIwMjEpKQ0KIyBsZGEgPC0gTERBKGR0bSwgayA9IDIsIGNvbnRyb2wgPSBsaXN0KHNlZWQgPSAyMDIxLGFscGhhID0gMixkZWx0YT0wLjEpLG1ldGhvZCA9ICJHaWJicyIpICPoqr/mlbRhbHBoYeWNs2RlbHRhDQpsZGENCmBgYA0KDQojIyDliKnnlKhMREHmqKHlnovlu7rnq4twaGnnn6npmaMNCmBgYHtyfQ0KdG9waWNzX3dvcmRzIDwtIHRpZHkobGRhLCBtYXRyaXggPSAiYmV0YSIpICMg5rOo5oSP77yM5ZyodGlkeSBmdW5jdGlvbuijoemdouimgeS9v+eUqCJiZXRhIuS+huWPluWHulBoaeefqemZo+OAgg0KY29sbmFtZXModG9waWNzX3dvcmRzKSA8LSBjKCJ0b3BpYyIsICJ0ZXJtIiwgInBoaSIpDQp0b3BpY3Nfd29yZHMNCmBgYA0KDQojIyDlsIvmib5Ub3BpY+eahOS7o+ihqOWtlw0KDQo+IHRlcm1z5L6d54Wn5ZCE5Li76aGM55qEcGhp5YC855Sx5aSn5Yiw5bCP5o6S5bqP77yM5YiX5Ye65YmNMTDlpKcNCg0KYGBge3J9DQp0b3BpY3Nfd29yZHMgJT4lDQogIGdyb3VwX2J5KHRvcGljKSAlPiUNCiAgdG9wX24oMTAsIHBoaSkgJT4lDQogIHVuZ3JvdXAoKSAlPiUNCiAgbXV0YXRlKHRvcF93b3JkcyA9IHJlb3JkZXJfd2l0aGluKHRlcm0scGhpLHRvcGljKSkgJT4lDQogIGdncGxvdChhZXMoeCA9IHRvcF93b3JkcywgeSA9IHBoaSwgZmlsbCA9IGFzLmZhY3Rvcih0b3BpYykpKSArDQogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsNCiAgZmFjZXRfd3JhcCh+IHRvcGljLCBzY2FsZXMgPSAiZnJlZSIpICsNCiAgY29vcmRfZmxpcCgpICsNCiAgc2NhbGVfeF9yZW9yZGVyZWQoKQ0KYGBgDQoNCiNjaDMuIOWwi+aJvuacgOS9s+S4u+mhjOaVuA0KDQojIyDlu7rnq4vmm7TlpJrkuLvpoYznmoTkuLvpoYzmqKHlnosNCg0KPiDlmJfoqaYy44CBNOOAgTbjgIExMOOAgTE15YCL5Li76aGM5pW477yM5bCH57WQ5p6c5a2Y6LW35L6G77yM5YaN5YGa6YCy5LiA5q2l5YiG5p6Q44CCDQrmraTpg6jliIbpnIDopoHot5HkuIDmrrXmmYLplpPvvIzlt7LntpPlsIfot5HlroznmoTmqpTmoYjlrZjmiJBsZGFzX3Jlc3VsdC5yZGF0Ye+8jOWPr+S7peebtOaOpei8ieWFpQ0KDQpgYGB7ciBldmFsPUZBTFNFfQ0KIyBsZGFzID0gYygpDQojIHRvcGljcyA9IGMoMiw0LDYsMTAsMTUpDQojIGZvcih0b3BpYyBpbiB0b3BpY3Mpew0KIyAgIHN0YXJ0X3RpbWUgPC0gU3lzLnRpbWUoKQ0KIyAgIGxkYSA8LSBMREEoZHRtLCBrID0gdG9waWMsIGNvbnRyb2wgPSBsaXN0KHNlZWQgPSAyMDIxKSkNCiMgICBsZGFzID1jKGxkYXMsbGRhKQ0KIyAgIHByaW50KHBhc3RlKHRvcGljICxwYXN0ZSgidG9waWMocykgYW5kIHVzZSB0aW1lIGlzICIsIFN5cy50aW1lKCkgLXN0YXJ0X3RpbWUpKSkNCiMgICBzYXZlKGxkYXMsZmlsZSA9ICJsZGFzX3Jlc3VsdC5yZGF0YSIpICMg5bCH5qih5Z6L6Ly45Ye65oiQ5qqU5qGIDQojIH0NCmBgYA0KDQo+IOi8ieWFpeavj+WAi+S4u+mhjOeahExEQee1kOaenA0KDQpgYGB7cn0NCmxvYWQoImxkYXNfcmVzdWx0LnJkYXRhIikNCmBgYA0KDQojIyDpgI/pgY5wZXJwbGV4aXR55om+5Yiw5pyA5L2z5Li76aGM5pW4DQpgYGB7cn0NCnRvcGljcyA9IGMoMiw0LDYsMTAsMTUpDQpkYXRhX2ZyYW1lKGsgPSB0b3BpY3MsIHBlcnBsZXggPSBtYXBfZGJsKGxkYXMsIHRvcGljbW9kZWxzOjpwZXJwbGV4aXR5KSkgJT4lDQogIGdncGxvdChhZXMoaywgcGVycGxleCkpICsNCiAgZ2VvbV9wb2ludCgpICsNCiAgZ2VvbV9saW5lKCkgKw0KICBsYWJzKHRpdGxlID0gIkV2YWx1YXRpbmcgTERBIHRvcGljIG1vZGVscyIsDQogICAgICAgc3VidGl0bGUgPSAiT3B0aW1hbCBudW1iZXIgb2YgdG9waWNzIChzbWFsbGVyIGlzIGJldHRlcikiLA0KICAgICAgIHggPSAiTnVtYmVyIG9mIHRvcGljcyIsDQogICAgICAgeSA9ICJQZXJwbGV4aXR5IikNCmBgYA0KDQoNCj4gY3JlYXRlIExEQXZpc+aJgOmcgOeahGpzb24gZnVuY3Rpb24NCuatpGZ1bmN0aW9u5piv5bCH5YmN6Z2i5L2/55SoICJMREEgZnVuY3Rpb24i5omA5bu656uL55qEbW9kZWzvvIzovYnmj5vngroiTERBVmlzIuWll+S7tueahGlucHV05qC85byP44CCDQoNCmBgYHtyfQ0KDQp0b3BpY21vZGVsc19qc29uX2xkYXZpcyA8LSBmdW5jdGlvbihmaXR0ZWQsIGRvY190ZXJtKXsNCiAgICByZXF1aXJlKExEQXZpcykNCiAgICByZXF1aXJlKHNsYW0pDQogIA0KICAgICMjI+S7peS4i2Z1bmN0aW9uIOeUqOS+huino+axuu+8jOS4u+mhjOaVuOWkmuacg+WHuuePvk5B55qE5ZWP6aGMDQogICAgIyMjIOWPg+iAgyBodHRwczovL2dpdGh1Yi5jb20vY3BzaWV2ZXJ0L0xEQXZpcy9jb21taXQvYzcyMzRkNzExNjhiMWU5NDZhMzYxYmMwMDU5M2JjNWM0YmY4ZTU3ZQ0KICAgIGxzX0xEQSA9IGZ1bmN0aW9uIChwaGkpew0KICAgICAgamVuc2VuU2hhbm5vbiA8LSBmdW5jdGlvbih4LCB5KSB7DQogICAgICAgIG0gPC0gMC41ICogKHggKyB5KQ0KICAgICAgICBsaHMgPC0gaWZlbHNlKHggPT0gMCwgMCwgeCAqIChsb2coeCkgLSBsb2cobSsxZS0xNikpKQ0KICAgICAgICByaHMgPC0gaWZlbHNlKHkgPT0gMCwgMCwgeSAqIChsb2coeSkgLSBsb2cobSsxZS0xNikpKQ0KICAgICAgICAwLjUgKiBzdW0obGhzKSArIDAuNSAqIHN1bShyaHMpDQogICAgICB9DQogICAgICBkaXN0Lm1hdCA8LSBwcm94eTo6ZGlzdCh4ID0gcGhpLCBtZXRob2QgPSBqZW5zZW5TaGFubm9uKQ0KICAgICAgcGNhLmZpdCA8LSBzdGF0czo6Y21kc2NhbGUoZGlzdC5tYXQsIGsgPSAyKQ0KICAgICAgZGF0YS5mcmFtZSh4ID0gcGNhLmZpdFssIDFdLCB5ID0gcGNhLmZpdFssIDJdKQ0KICAgIH0NCiAgDQogICAgICAjIEZpbmQgcmVxdWlyZWQgcXVhbnRpdGllcw0KICAgICAgcGhpIDwtIGFzLm1hdHJpeChwb3N0ZXJpb3IoZml0dGVkKSR0ZXJtcykNCiAgICAgIHRoZXRhIDwtIGFzLm1hdHJpeChwb3N0ZXJpb3IoZml0dGVkKSR0b3BpY3MpDQogICAgICB2b2NhYiA8LSBjb2xuYW1lcyhwaGkpDQogICAgICB0ZXJtX2ZyZXEgPC0gc2xhbTo6Y29sX3N1bXMoZG9jX3Rlcm0pDQogIA0KICAgICAgIyBDb252ZXJ0IHRvIGpzb24NCiAgICAgIGpzb25fbGRhIDwtIExEQXZpczo6Y3JlYXRlSlNPTihwaGkgPSBwaGksIHRoZXRhID0gdGhldGEsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdm9jYWIgPSB2b2NhYiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkb2MubGVuZ3RoID0gYXMudmVjdG9yKHRhYmxlKGRvY190ZXJtJGkpKSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB0ZXJtLmZyZXF1ZW5jeSA9IHRlcm1fZnJlcSwgbWRzLm1ldGhvZCA9IGxzX0xEQSkNCiAgDQogICAgICByZXR1cm4oanNvbl9sZGEpDQp9DQpgYGANCg0KIyMg55Si55SfTERBdmlz57WQ5p6cDQoNCmBgYHtyIGV2YWw9RkFMU0V9DQoNCnRoZV9sZGEgPSBsZGFzW1syXV0NCmpzb25fcmVzIDwtIHRvcGljbW9kZWxzX2pzb25fbGRhdmlzKHRoZV9sZGEsZHRtKQ0Kc2VyVmlzKGpzb25fcmVzLG9wZW4uYnJvd3NlciA9IFQpDQpgYGANCg0KIyMjIOeUoueUn0xEQXZpc+aqlOahiO+8jOWtmOiHs2xvY2Fs56uvDQpgYGB7ciBldmFsPUZBTFNFfQ0Kc2VyVmlzKGpzb25fcmVzLCBvdXQuZGlyID0gInZpcyIsIG9wZW4uYnJvd3NlciA9IFQpDQp3cml0ZUxpbmVzKGljb252KHJlYWRMaW5lcygiLi92aXMvbGRhLmpzb24iKSwgdG8gPSAiVVRGOCIpKQ0KYGBgDQoNCg0KDQojIGNoNC4gTERB5YiG5p6QDQoNCiMjIOmBuOWumjTlgIvkuLvpoYzmlbjnmoTkuLvpoYzmqKHlnosNCmBgYHtyfQ0KdGhlX2xkYSA9IGxkYXNbWzJdXSAjIyDpgbjlrpp0b3BpYyDngrogNCDnmoTntZDmnpwNCmBgYA0KDQpgYGB7cn0NCnRvcGljc193b3JkcyA8LSB0aWR5KHRoZV9sZGEsIG1hdHJpeCA9ICJiZXRhIikgIyDms6jmhI/vvIzlnKh0aWR5IGZ1bmN0aW9u6KOh6Z2i6KaB5L2/55SoImJldGEi5L6G5Y+W5Ye6UGhp55+p6Zmj44CCDQpjb2xuYW1lcyh0b3BpY3Nfd29yZHMpIDwtIGMoInRvcGljIiwgInRlcm0iLCAicGhpIikNCnRvcGljc193b3JkcyAlPiUgYXJyYW5nZShkZXNjKHBoaSkpICU+JSBoZWFkKDEwKQ0KYGBgDQoNCj4gdGVybXPkvp3nhaflkITkuLvpoYznmoRwaGnlgLznlLHlpKfliLDlsI/mjpLluo8NCg0KYGBge3J9DQp0b3BpY3Nfd29yZHMgJT4lDQogIGdyb3VwX2J5KHRvcGljKSAlPiUNCiAgdG9wX24oMTAsIHBoaSkgJT4lDQogIHVuZ3JvdXAoKSAlPiUNCiAgZ2dwbG90KGFlcyh4ID0gcmVvcmRlcl93aXRoaW4odGVybSxwaGksdG9waWMpLCB5ID0gcGhpLCBmaWxsID0gYXMuZmFjdG9yKHRvcGljKSkpICsNCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKw0KICBmYWNldF93cmFwKH4gdG9waWMsIHNjYWxlcyA9ICJmcmVlIikgKw0KICBjb29yZF9mbGlwKCkgKw0KICBzY2FsZV94X3Jlb3JkZXJlZCgpDQpgYGANCg0KPuWOu+mZpOWFsemAmuipnuW9mQ0KDQpgYGB7cn0NCnJlbW92ZWRfd29yZCA9IGMoIuiCuueCjiIsIuaWsOWGoCIsIueWq+iLlyIsIuaOpeeoriIsIuebruWJjSIsIuihqOekuiIsIuaykuaciSIpDQoNCnRvcGljc193b3JkcyAlPiUNCiAgZmlsdGVyKCF0ZXJtICAlaW4lIHJlbW92ZWRfd29yZCkgJT4lDQogIGdyb3VwX2J5KHRvcGljKSAlPiUNCiAgdG9wX24oMTAsIHBoaSkgJT4lDQogIHVuZ3JvdXAoKSAlPiUNCiAgZ2dwbG90KGFlcyh4ID0gcmVvcmRlcl93aXRoaW4odGVybSxwaGksdG9waWMpLCB5ID0gcGhpLCBmaWxsID0gYXMuZmFjdG9yKHRvcGljKSkpICsNCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKw0KICBmYWNldF93cmFwKH4gdG9waWMsIHNjYWxlcyA9ICJmcmVlIikgKw0KICBjb29yZF9mbGlwKCkgKw0KICBzY2FsZV94X3Jlb3JkZXJlZCgpDQpgYGANCg0KIyMjIOS4u+mhjOWRveWQjQ0KYGBge3J9DQp0b3BpY3NfbmFtZSA9IGMoIkFa55ar6IuXIiwi5Y+w54Gj55ar6IuX5pa95omTIiwi55ar6IuX56CU55m86YCy5bqmIiwi6Lyd55Ge55ar6IuXIikNCmBgYA0KDQojIyBEb2N1bWVudCDkuLvpoYzliIbkvYgNCmBgYHtyfQ0KIyBmb3IgZXZlcnkgZG9jdW1lbnQgd2UgaGF2ZSBhIHByb2JhYmlsaXR5IGRpc3RyaWJ1dGlvbiBvZiBpdHMgY29udGFpbmVkIHRvcGljcw0KdG1SZXN1bHQgPC0gcG9zdGVyaW9yKHRoZV9sZGEpDQpkb2NfcHJvIDwtIHRtUmVzdWx0JHRvcGljcw0KZG9jdW1lbnRfdG9waWNzIDwtIGRvY19wcm9bbWV0YWRhdGEkYXJ0VXJsLF0NCmRvY3VtZW50X3RvcGljc19kZiA9ZGF0YS5mcmFtZShkb2N1bWVudF90b3BpY3MpDQpjb2xuYW1lcyhkb2N1bWVudF90b3BpY3NfZGYpID0gdG9waWNzX25hbWUNCnJvd25hbWVzKGRvY3VtZW50X3RvcGljc19kZikgPSBOVUxMDQpuZXdzX3RvcGljID0gY2JpbmQobWV0YWRhdGEsZG9jdW1lbnRfdG9waWNzX2RmKQ0KYGBgDQoNCj4g54++5Zyo5oiR5YCR55yL5q+P5LiA56+H55qE5paH56ug5YiG5L2I5LqG77yBDQoNCiMjIyDmn6XnnIvnibnlrprkuLvpoYznmoTmlofnq6ANCisg6YCP6YGO5om+5Yiw54m55a6a5paH56ug55qE5YiG5L2I6YCy6KGM5o6S5bqP5LmL5b6M77yM5Y+v5Lul55yL5Yiw5q2k5Li76aGM55qE5q+U6YeN6auY55qE5paH56ug5Zyo6KiO6KuW5LuA6bq844CCDQoNCmBgYHtyICxldmFsPUZBTFNFfQ0KbmV3c190b3BpYyAlPiUNCiAgYXJyYW5nZShkZXNjKGBBWueWq+iLl2ApKSAlPiVoZWFkKDEwKSANCmBgYA0KDQojIyMg5LqG6Kej5Li76aGM5Zyo5pmC6ZaT55qE6K6K5YyWDQpgYGB7ciB3YXJuaW5nPUZBTFNFfQ0KbmV3c190b3BpYyAlPiUgDQogIG11dGF0ZShhcnREYXRlID0gYXMuRGF0ZShhcnREYXRlKSkgJT4lDQogIGdyb3VwX2J5KGFydERhdGUgPSBmb3JtYXQoYXJ0RGF0ZSwnJVklbScpKSAlPiUNCiAgc3VtbWFyaXNlX2lmKGlzLm51bWVyaWMsIHN1bSwgbmEucm0gPSBUUlVFKSAlPiUNCiAgbWVsdChpZC52YXJzID0gImFydERhdGUiKSU+JQ0KICBnZ3Bsb3QoIGFlcyh4PWFydERhdGUsIHk9dmFsdWUsIGZpbGw9dmFyaWFibGUpKSArIA0KICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikgKyB5bGFiKCJ2YWx1ZSIpICsgDQogIHNjYWxlX2ZpbGxfbWFudWFsKHZhbHVlcz1teWNvbG9yc1tjKDEsNSw4LDEyKV0pKw0KICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDkwLCBoanVzdCA9IDEpKQ0KYGBgDQoNCiMjIyDljrvpmaTnrYbmlbjlsJHmnIjku70NCmBgYHtyIHdhcm5pbmc9RkFMU0V9DQpuZXdzX3RvcGljICU+JQ0KICBtdXRhdGUoYXJ0RGF0ZSA9IGFzLkRhdGUoYXJ0RGF0ZSkpICU+JSANCiAgZmlsdGVyKCAhZm9ybWF0KGFydERhdGUsJyVZJW0nKSAlaW4lIGMoMjAyMDExLDIwMjEwNSkpJT4lDQogIGdyb3VwX2J5KGFydERhdGUgPSBmb3JtYXQoYXJ0RGF0ZSwnJVklbScpKSAlPiUNCiAgc3VtbWFyaXNlX2lmKGlzLm51bWVyaWMsIHN1bSwgbmEucm0gPSBUUlVFKSAlPiUNCiAgbWVsdChpZC52YXJzID0gImFydERhdGUiKSU+JQ0KICBnZ3Bsb3QoIGFlcyh4PWFydERhdGUsIHk9dmFsdWUsIGZpbGw9dmFyaWFibGUpKSArIA0KICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikgKyB5bGFiKCJ2YWx1ZSIpICsgDQogICAgc2NhbGVfZmlsbF9tYW51YWwodmFsdWVzPW15Y29sb3JzW2MoMSw1LDgsMTIpXSkrDQogICAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA5MCwgaGp1c3QgPSAxKSkNCmBgYA0KDQojIyMg5Lul5q+U5L6L5LqG6Kej5Li76aGM5pmC6ZaT6K6K5YyWDQpgYGB7ciB3YXJuaW5nPUZBTFNFfQ0KbmV3c190b3BpYyAlPiUNCiAgbXV0YXRlKGFydERhdGUgPSBhcy5EYXRlKGFydERhdGUpKSAlPiUgDQogIGZpbHRlciggIWZvcm1hdChhcnREYXRlLCclWSVtJykgJWluJSBjKDIwMjAxMSwyMDIxMDUpKSU+JQ0KICBncm91cF9ieShhcnREYXRlID0gZm9ybWF0KGFydERhdGUsJyVZJW0nKSkgJT4lDQogIHN1bW1hcmlzZV9pZihpcy5udW1lcmljLCBzdW0sIG5hLnJtID0gVFJVRSkgJT4lDQogIG1lbHQoaWQudmFycyA9ICJhcnREYXRlIiklPiUNCiAgZ3JvdXBfYnkoYXJ0RGF0ZSklPiUNCiAgbXV0YXRlKHRvdGFsX3ZhbHVlID1zdW0odmFsdWUpKSU+JQ0KICBnZ3Bsb3QoIGFlcyh4PWFydERhdGUsIHk9dmFsdWUvdG90YWxfdmFsdWUsIGZpbGw9dmFyaWFibGUpKSArIA0KICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikgKyB5bGFiKCJwcm9wb3J0aW9uIikgKyANCiAgICBzY2FsZV9maWxsX21hbnVhbCh2YWx1ZXM9bXljb2xvcnNbYygxLDUsOCwxMildKSsNCiAgICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDkwLCBoanVzdCA9IDEpKQ0KYGBgDQoNCiMg6KOc5YWFIC0g5LiN5ZCM6KiT57e0TERB5qih5Z6L5aWX5Lu2DQoNCj4g5Y+D6ICDIGh0dHA6Ly90ZXh0MnZlYy5vcmcvdG9waWNfbW9kZWxpbmcuaHRtbCNsYXRlbnRfZGlyaWNobGV0X2FsbG9jYXRpb24NCg0KYGBge3J9DQpsaWJyYXJ5KHRleHQydmVjKQ0KbGlicmFyeSh1ZHBpcGUpDQp0b2tlbnMgPC0gbWV0YWRhdGEgJT4lDQogIHVubmVzdF90b2tlbnMod29yZCwgc2VudGVuY2UsIHRva2VuPW5ld3NfdG9rZW5pemVyKSAlPiUNCiAgZmlsdGVyKCFzdHJfZGV0ZWN0KHdvcmQsIHJlZ2V4KCJbMC05YS16QS1aXSIpKXwgc3RyX2RldGVjdCh3b3JkLCByZWdleCgiW0FhXVtael0iKSkpDQpgYGANCg0KIyMg5bu656uLRFRNIG1hdHJpeA0KYGBge3J9DQpkdGYgPC0gZG9jdW1lbnRfdGVybV9mcmVxdWVuY2llcyh0b2tlbnMsIGRvY3VtZW50ID0gImFydFVybCIsIHRlcm0gPSAid29yZCIpDQpkdG0gPC0gZG9jdW1lbnRfdGVybV9tYXRyaXgoeCA9IGR0ZikNCmR0bV9jbGVhbiA8LSBkdG1fcmVtb3ZlX2xvd2ZyZXEoZHRtLCBtaW5mcmVxID0gMzApDQpkaW0oZHRtX2NsZWFuKQ0KYGBgDQoNCiMjIExEQSDmqKHlnosNCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQoNCnNldC5zZWVkKDIwMTkpDQoNCnRvcGljX24gPSA0DQoNCmxkYV9tb2RlbCA9dGV4dDJ2ZWM6OkxEQSRuZXcobl90b3BpY3MgPSB0b3BpY19uLGRvY190b3BpY19wcmlvciA9IDAuMSwgdG9waWNfd29yZF9wcmlvciA9IDAuMDAxKQ0KZG9jX3RvcGljX2Rpc3RyID1sZGFfbW9kZWwkZml0X3RyYW5zZm9ybShkdG1fY2xlYW4sIG5faXRlciA9IDEwMDAsIGNvbnZlcmdlbmNlX3RvbCA9IDFlLTUsY2hlY2tfY29udmVyZ2VuY2VfZXZlcnlfbiA9IDEwMCkNCmBgYA0KDQo+IOmAmeWAi+avlHRvcGljbW9kZWxz55qEcGFja2FnZei3keW/q+i2heWkmuWAjQ0KDQojIyDkuIDmqKPlj6/ku6XnlKhMREF2aXPnmoTlpZfku7bkvobnnIsNCmBgYHtyfQ0KbGRhX21vZGVsJGdldF90b3Bfd29yZHMobiA9IDEwLCBsYW1iZGEgPSAwLjUpICMjIOafpeeciyDliY0xMOS4u+mhjOWtlw0KbGRhX21vZGVsJHBsb3QoKQ0KIyBsZGFfbW9kZWwkcGxvdChvdXQuZGlyID0ibGRhX3Jlc3VsdCIsIG9wZW4uYnJvd3NlciA9IFRSVUUpDQpgYGA=